001    /*
002     * The contents of this file are subject to the Mozilla Public
003     * License Version 1.1 (the "License"); you may not use this file
004     * except in compliance with the License. You may obtain a copy of
005     * the License at http://www.mozilla.org/MPL/
006     *
007     * Software distributed under the License is distributed on an "AS
008     * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009     * implied. See the License for the specific language governing
010     * rights and limitations under the License.
011     *
012     * The Original Code is Knowtator.
013     *
014     * The Initial Developer of the Original Code is University of Colorado.  
015     * Copyright (C) 2005 - 2008.  All Rights Reserved.
016     *
017     * Knowtator was developed by the Center for Computational Pharmacology
018     * (http://compbio.uchcs.edu) at the University of Colorado Health 
019     *  Sciences Center School of Medicine with support from the National 
020     *  Library of Medicine.  
021     *
022     * Current information about Knowtator can be obtained at 
023     * http://knowtator.sourceforge.net/
024     *
025     * Contributor(s):
026     *   Philip V. Ogren <philip@ogren.info> (Original Author)
027     */
028    package edu.uchsc.ccp.iaa.matcher;
029    
030    import java.util.ArrayList;
031    import java.util.HashSet;
032    import java.util.List;
033    import java.util.Set;
034    
035    import edu.uchsc.ccp.iaa.Annotation;
036    import edu.uchsc.ccp.iaa.IAA;
037    
038    public class SpansOverlapSimpleFeatureMatcher implements Matcher {
039    
040            /**
041             * This method will return an annotation that has overlapping spans and the
042             * same simple features. It is not required that the annotation class match.
043             * Preference will be given to an annotation that has the same class and
044             * spans. Secondary preference will be given to an annotation with the same
045             * span followed by an the shortest annotation with overlapping spans and
046             * the same simple features. If no annotation has overlapping spans and the
047             * same simple features, then null is returned.
048             * 
049             * @param matchResult
050             *            will be set to:
051             *            <ul>
052             *            <li>TRIVIAL_NONMATCH if there are no overlapping annotations
053             *            with the passed in annotation
054             *            <li>NONTRIVIAL_MATCH if there is an annotation that is
055             *            overlapping and the Annotation.compareSimpleFeatures returns
056             *            NONTRIVIAL_MATCH
057             *            <li>TRIVIAL_MATCH if there is an annotation that is
058             *            overlapping and the Annotation.compareSimpleFeatures returns
059             *            TRIVIAL_MATCH <br>
060             *            Note: if there is a trivial_match then there cannot possibly
061             *            be a NONTRIVIAL_MATCH because one of the simple features of
062             *            the passed in annotation must have a null value or there are
063             *            no simple features.
064             *            <li>NONTRIVIAL_NONMATCH if there an annotation that is
065             *            overlapping and the Annotation.compareSimpleFeatures returns
066             *            NONTRIVIAL_NONMATCH
067             *            <li>TRIVIAL_NONMATCH if there is no match or non-trivial
068             *            non-match found.
069             * @return will return the first nontrivial match that it finds preferring
070             * @see edu.uchsc.ccp.iaa.matcher.Matcher#match(Annotation, String, Set,
071             *      IAA, MatchResult)
072             * @see edu.uchsc.ccp.iaa.matcher.MatchResult#NONTRIVIAL_MATCH
073             * @see edu.uchsc.ccp.iaa.matcher.MatchResult#NONTRIVIAL_NONMATCH
074             * @see edu.uchsc.ccp.iaa.matcher.MatchResult#TRIVIAL_MATCH
075             * @see edu.uchsc.ccp.iaa.matcher.MatchResult#TRIVIAL_NONMATCH
076             */
077    
078            public Annotation match(Annotation annotation, String compareSetName, Set<Annotation> excludeAnnotations, IAA iaa,
079                            MatchResult matchResult) {
080    
081                    Annotation spansExactSimpleFeatureMatch = SpansExactSimpleFeatureMatcher.match(annotation, compareSetName, iaa,
082                                    excludeAnnotations, matchResult);
083    
084                    // if TRIVIAL_MATCH then we do not have to worry about there being an
085                    // overlapping NONTRIVIAL_MATCH further down
086                    // because we know that a trivial match is the best we can do.
087                    if (spansExactSimpleFeatureMatch != null
088                                    && (matchResult.getResult() == MatchResult.NONTRIVIAL_MATCH || matchResult.getResult() == MatchResult.TRIVIAL_MATCH)) {
089                            return spansExactSimpleFeatureMatch;
090                    }
091    
092                    Set<Annotation> candidateAnnotations = new HashSet<Annotation>(iaa.getOverlappingAnnotations(annotation,
093                                    compareSetName));
094                    candidateAnnotations.removeAll(excludeAnnotations);
095    
096                    // we are going to collect all matches because we want to return the
097                    // shortest of the matches if there is more than one.
098                    List<Annotation> nontrivialMatches = new ArrayList<Annotation>();
099                    List<Annotation> trivialMatches = new ArrayList<Annotation>();
100    
101                    boolean nontrivialNonmatch = false;
102    
103                    for (Annotation candidateAnnotation : candidateAnnotations) {
104                            if (!excludeAnnotations.contains(candidateAnnotation)) {
105                                    int result = Annotation.compareSimpleFeatures(annotation, candidateAnnotation);
106                                    if (result == MatchResult.NONTRIVIAL_MATCH) {
107                                            nontrivialMatches.add(candidateAnnotation);
108                                    } else if (result == MatchResult.TRIVIAL_MATCH) {
109                                            trivialMatches.add(candidateAnnotation);
110                                    }
111                                    if (result == MatchResult.NONTRIVIAL_NONMATCH) {
112                                            nontrivialNonmatch = true;
113                                    }
114                            }
115                    }
116    
117                    if (nontrivialMatches.size() > 0) {
118                            matchResult.setResult(MatchResult.NONTRIVIAL_MATCH);
119                            if (nontrivialMatches.size() == 1) {
120                                    return nontrivialMatches.iterator().next();
121                            } else {
122                                    return Annotation.getShortestAnnotation(nontrivialMatches);
123                            }
124                    }
125                    if (trivialMatches.size() > 0) {
126                            matchResult.setResult(MatchResult.TRIVIAL_MATCH);
127                            if (trivialMatches.size() == 1) {
128                                    return trivialMatches.iterator().next();
129                            } else {
130                                    return Annotation.getShortestAnnotation(trivialMatches);
131                            }
132                    }
133    
134                    if (nontrivialNonmatch)
135                            matchResult.setResult(MatchResult.NONTRIVIAL_NONMATCH);
136                    else
137                            matchResult.setResult(MatchResult.TRIVIAL_NONMATCH);
138                    return null;
139            }
140    
141            public String getName() {
142                    return "Simple slots matcher (with overlapping spans)";
143            }
144    
145            public String getDescription() {
146                    return "Annotations match if they have overlapping spans and the same value for simple slots (e.g. slots that are primitive values such as integer and String).  Only slots that are specified must match.";
147            }
148    
149            public boolean returnsTrivials() {
150                    return true;
151            }
152    
153    }