001    /*
002     * The contents of this file are subject to the Mozilla Public
003     * License Version 1.1 (the "License"); you may not use this file
004     * except in compliance with the License. You may obtain a copy of
005     * the License at http://www.mozilla.org/MPL/
006     *
007     * Software distributed under the License is distributed on an "AS
008     * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009     * implied. See the License for the specific language governing
010     * rights and limitations under the License.
011     *
012     * The Original Code is Knowtator.
013     *
014     * The Initial Developer of the Original Code is University of Colorado.  
015     * Copyright (C) 2005-2008.  All Rights Reserved.
016     *
017     * Knowtator was developed by the Center for Computational Pharmacology
018     * (http://compbio.uchcs.edu) at the University of Colorado Health 
019     *  Sciences Center School of Medicine with support from the National 
020     *  Library of Medicine.  
021     *
022     * Current information about Knowtator can be obtained at 
023     * http://knowtator.sourceforge.net/
024     *
025     * Contributor(s):
026     *   Philip V. Ogren <philip@ogren.info> (Original Author)
027     */
028    
029    package edu.uchsc.ccp.iaa;
030    
031    import java.lang.reflect.Constructor;
032    import java.lang.reflect.InvocationTargetException;
033    import java.util.Collections;
034    import java.util.HashMap;
035    import java.util.HashSet;
036    import java.util.Map;
037    import java.util.Set;
038    
039    import edu.uchsc.ccp.iaa.matcher.MatchResult;
040    import edu.uchsc.ccp.iaa.matcher.Matcher;
041    
042    public class IAA {
043            Set<String> annotationClasses;
044    
045            Set<String> setNames;
046    
047            Set<Annotation> annotations;
048    
049            Set<Annotation> emptyAnnotationSet;
050    
051            // key is the name of an annotations set, value is a set of annotations
052            Map<String, Set<Annotation>> annotationSets;
053    
054            // key is an annotation set, value is a map whose value is an annotation
055            // class
056            // and values is the set of annotations in the set having that class.
057            Map<String, Map<String, Set<Annotation>>> class2AnnotationsMap;
058    
059            // key is an annotation set, value is a annotationSpanIndex for the
060            // annotations in that set.
061            Map<String, AnnotationSpanIndex> spanIndexes;
062    
063            // key is an annotation set, value is a set of annotations that are
064            // considered matches.
065            Map<String, Set<Annotation>> allwayMatches;
066    
067            Map<String, Set<Annotation>> trivialAllwayMatches;
068    
069            Map<String, Set<Annotation>> nontrivialAllwayMatches;
070    
071            // key is an annotation set, value is a set of annotations that are
072            // considered non-matches.
073            Map<String, Set<Annotation>> allwayNonmatches;
074    
075            Map<String, Set<Annotation>> trivialAllwayNonmatches;
076    
077            Map<String, Set<Annotation>> nontrivialAllwayNonmatches;
078    
079            // key is an annotation, value is the set of n annotations that it was
080            // matched with in n-way IAA.
081            Map<Annotation, Set<Annotation>> allwayMatchSets;
082    
083            // key is an annotation set that is considered gold standard by which other
084            // annotation sets are compared,
085            // value is a map whose key is the annotation set being compared to gold
086            // standard and whose value are annotations (from the
087            // gold standard set) that are matches.
088    
089            Map<String, Map<String, Set<Annotation>>> pairwiseMatches;
090    
091            Map<String, Map<String, Set<Annotation>>> trivialPairwiseMatches;
092    
093            Map<String, Map<String, Set<Annotation>>> nontrivialPairwiseMatches;
094    
095            Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches;
096    
097            Map<String, Map<String, Set<Annotation>>> trivialPairwiseNonmatches;
098    
099            Map<String, Map<String, Set<Annotation>>> nontrivialPairwiseNonmatches;
100    
101            Map<Annotation, Set<Annotation>> pairwiseMatchPairs;
102    
103            Map<String, Object> matcherInfo;
104    
105            public IAA(Set<String> setNames) {
106                    this.setNames = setNames;
107                    annotationClasses = new HashSet<String>();
108    
109                    emptyAnnotationSet = Collections.unmodifiableSet(new HashSet<Annotation>());
110    
111                    Set<Annotation> emptySet = Collections.emptySet();
112                    setAnnotations(emptySet);
113                    reset();
114            }
115    
116            public IAA(Set<String> setNames, Set<Annotation> annotations) {
117                    this.setNames = setNames;
118                    annotationClasses = new HashSet<String>();
119                    setAnnotations(annotations);
120                    reset();
121            }
122    
123            public void reset() {
124                    allwayMatches = new HashMap<String, Set<Annotation>>();
125                    trivialAllwayMatches = new HashMap<String, Set<Annotation>>();
126                    nontrivialAllwayMatches = new HashMap<String, Set<Annotation>>();
127                    allwayNonmatches = new HashMap<String, Set<Annotation>>();
128                    trivialAllwayNonmatches = new HashMap<String, Set<Annotation>>();
129                    nontrivialAllwayNonmatches = new HashMap<String, Set<Annotation>>();
130    
131                    allwayMatchSets = new HashMap<Annotation, Set<Annotation>>();
132    
133                    pairwiseMatches = new HashMap<String, Map<String, Set<Annotation>>>();
134                    trivialPairwiseMatches = new HashMap<String, Map<String, Set<Annotation>>>();
135                    nontrivialPairwiseMatches = new HashMap<String, Map<String, Set<Annotation>>>();
136                    pairwiseNonmatches = new HashMap<String, Map<String, Set<Annotation>>>();
137                    trivialPairwiseNonmatches = new HashMap<String, Map<String, Set<Annotation>>>();
138                    nontrivialPairwiseNonmatches = new HashMap<String, Map<String, Set<Annotation>>>();
139    
140                    pairwiseMatchPairs = new HashMap<Annotation, Set<Annotation>>();
141    
142                    for (String setName : setNames) {
143                            allwayMatches.put(setName, new HashSet<Annotation>());
144                            trivialAllwayMatches.put(setName, new HashSet<Annotation>());
145                            nontrivialAllwayMatches.put(setName, new HashSet<Annotation>());
146                            allwayNonmatches.put(setName, new HashSet<Annotation>());
147                            trivialAllwayNonmatches.put(setName, new HashSet<Annotation>());
148                            nontrivialAllwayNonmatches.put(setName, new HashSet<Annotation>());
149    
150                            pairwiseMatches.put(setName, new HashMap<String, Set<Annotation>>());
151                            trivialPairwiseMatches.put(setName, new HashMap<String, Set<Annotation>>());
152                            nontrivialPairwiseMatches.put(setName, new HashMap<String, Set<Annotation>>());
153                            pairwiseNonmatches.put(setName, new HashMap<String, Set<Annotation>>());
154                            trivialPairwiseNonmatches.put(setName, new HashMap<String, Set<Annotation>>());
155                            nontrivialPairwiseNonmatches.put(setName, new HashMap<String, Set<Annotation>>());
156    
157                            for (String compareSet : annotationSets.keySet()) {
158                                    if (!setName.equals(compareSet)) {
159                                            pairwiseMatches.get(setName).put(compareSet, new HashSet<Annotation>());
160                                            trivialPairwiseMatches.get(setName).put(compareSet, new HashSet<Annotation>());
161                                            nontrivialPairwiseMatches.get(setName).put(compareSet, new HashSet<Annotation>());
162                                            pairwiseNonmatches.get(setName).put(compareSet, new HashSet<Annotation>());
163                                            trivialPairwiseNonmatches.get(setName).put(compareSet, new HashSet<Annotation>());
164                                            nontrivialPairwiseNonmatches.get(setName).put(compareSet, new HashSet<Annotation>());
165                                    }
166                            }
167                    }
168    
169            }
170    
171            public void setAnnotations(Set<Annotation> annotations) {
172                    this.annotations = annotations;
173                    annotationSets = new HashMap<String, Set<Annotation>>();
174                    for (String setName : setNames) {
175                            annotationSets.put(setName, new HashSet<Annotation>());
176                    }
177    
178                    class2AnnotationsMap = new HashMap<String, Map<String, Set<Annotation>>>();
179                    spanIndexes = new HashMap<String, AnnotationSpanIndex>();
180    
181                    for (Annotation annotation : annotations) {
182                            String setName = annotation.getSetName();
183                            String annotationClass = annotation.getAnnotationClass();
184                            if (annotationClass != null)
185                                    annotationClasses.add(annotationClass);
186                            // throw exception here if there is a setName in the annotations
187                            // that was not passed in.
188                            annotationSets.get(setName).add(annotation);
189                    }
190    
191                    for (String setName : setNames) {
192                            Set<Annotation> setAnnotations = annotationSets.get(setName);
193    
194                            spanIndexes.put(setName, new AnnotationSpanIndex(setAnnotations));
195    
196                            Map<String, Set<Annotation>> classAnnotations = new HashMap<String, Set<Annotation>>();
197                            class2AnnotationsMap.put(setName, classAnnotations);
198    
199                            for (Annotation setAnnotation : setAnnotations) {
200                                    String annotationClass = setAnnotation.getAnnotationClass();
201                                    if (!classAnnotations.containsKey(annotationClass)) {
202                                            classAnnotations.put(annotationClass, new HashSet<Annotation>());
203                                    }
204                                    classAnnotations.get(annotationClass).add(setAnnotation);
205                            }
206                    }
207            }
208    
209            public void allwayIAA(Class matcherClass) throws NoSuchMethodException, InstantiationException,
210                            IllegalAccessException, InvocationTargetException, IAAException {
211                    Constructor constructor = matcherClass.getConstructor();
212                    Matcher matcher = (Matcher) constructor.newInstance();
213                    allwayIAA(matcher);
214            }
215    
216            public void allwayIAA(Matcher matcher) throws IAAException {
217                    /*
218                     * At the moment an annotation is found to be a match, there are n-1
219                     * other annotations that are also found to be a match (an annotation
220                     * for each of the other annotators). We will gather all matches as we
221                     * discover them so that a multiple annotations will not match with an
222                     * annotation that has already been matched. This might happen if, for
223                     * example, one annotator mistakenly created a duplicate annotation. We
224                     * would only want to consider one of them a match. All annotations that
225                     * have been found to be a match will be put in
226                     * matchedAnnotationsAllway.
227                     */
228                    Set<Annotation> matchedAnnotations = new HashSet<Annotation>();
229                    for (Annotation annotation : annotations) {
230                            String setName = annotation.getSetName();
231                            if (!matchedAnnotations.contains(annotation)) {
232                                    MatchResult matchResult = new MatchResult();
233                                    // just because an annotation matches with another annotation
234                                    // from each
235                                    // of the other sets, that does not mean the other annotations
236                                    // match with
237                                    // each other. This is particularly true for 'overlapping' span
238                                    // criteria.
239                                    Set<Annotation> matches = match(annotation, matchedAnnotations, matcher, matchResult);
240                                    if (matches != null) {
241                                            allwayMatches.get(setName).add(annotation);
242                                            Set<Annotation> allMatches = new HashSet<Annotation>(matches);
243                                            allMatches.add(annotation);
244                                            allwayMatchSets.put(annotation, allMatches);
245    
246                                            for (Annotation match : matches) {
247                                                    String matchedSet = match.getSetName();
248                                                    allwayMatches.get(matchedSet).add(match);
249                                                    allwayMatchSets.put(match, allMatches);
250                                            }
251                                            if (matchResult.getResult() == MatchResult.NONTRIVIAL_MATCH) {
252                                                    nontrivialAllwayMatches.get(setName).add(annotation);
253                                                    for (Annotation match : matches) {
254                                                            String matchedSet = match.getSetName();
255                                                            nontrivialAllwayMatches.get(matchedSet).add(match);
256                                                    }
257    
258                                            } else if (matchResult.getResult() == MatchResult.TRIVIAL_MATCH) {
259                                                    trivialAllwayMatches.get(setName).add(annotation);
260                                                    for (Annotation match : matches) {
261                                                            String matchedSet = match.getSetName();
262                                                            trivialAllwayMatches.get(matchedSet).add(match);
263                                                    }
264                                            } else {
265                                                    // needs to either be an error - or we need a lot more
266                                                    // descriptive information that a user can report back
267                                                    // to me.
268                                                    throw new IAAException(
269                                                                    "Match algorithm resulted in a NONTRIVIAL_MATCH or TRIVIAL_MATCH, but it also returned null.");
270                                            }
271    
272                                            matchedAnnotations.add(annotation);
273                                            matchedAnnotations.addAll(matches);
274                                    } else {
275                                            allwayNonmatches.get(setName).add(annotation);
276                                            if (matchResult.getResult() == MatchResult.NONTRIVIAL_NONMATCH)
277                                                    nontrivialAllwayNonmatches.get(setName).add(annotation);
278                                            else if (matchResult.getResult() == MatchResult.TRIVIAL_NONMATCH)
279                                                    trivialAllwayNonmatches.get(setName).add(annotation);
280                                            else {
281                                                    throw new IAAException(
282                                                                    "Match algorithm resulted in a NONTRIVIAL_NONMATCH or TRIVIAL_NONMATCH, but the match algorithm did not return null.");
283                                            }
284                                    }
285                            }
286                    }
287            }
288    
289            /**
290             * This method performs pairwise IAA for each combination of annotators.
291             * 
292             */
293            public void pairwiseIAA(Class matchClass) throws NoSuchMethodException, InstantiationException,
294                            IllegalAccessException, InvocationTargetException, IAAException {
295                    Constructor constructor = matchClass.getConstructor();
296                    Matcher matcher = (Matcher) constructor.newInstance();
297                    pairwiseIAA(matcher);
298            }
299    
300            public void pairwiseIAA(Matcher matcher) throws IAAException {
301                    for (Annotation annotation : annotations) {
302                            String setName = annotation.getSetName();
303                            for (String compareSetName : annotationSets.keySet()) {
304                                    if (!setName.equals(compareSetName)) {
305                                            Set<Annotation> matchedAnnotations = pairwiseMatches.get(setName).get(compareSetName);
306                                            if (matchedAnnotations.contains(annotation))
307                                                    continue;
308    
309                                            Set<Annotation> excludeAnnotations = pairwiseMatches.get(compareSetName).get(setName);
310                                            MatchResult matchResult = new MatchResult();
311                                            Annotation match = matcher.match(annotation, compareSetName, excludeAnnotations, this, matchResult);
312                                            if (match != null) {
313                                                    pairwiseMatches.get(setName).get(compareSetName).add(annotation);
314                                                    pairwiseMatches.get(compareSetName).get(setName).add(match);
315    
316                                                    if (!pairwiseMatchPairs.containsKey(annotation))
317                                                            pairwiseMatchPairs.put(annotation, new HashSet<Annotation>());
318                                                    if (!pairwiseMatchPairs.containsKey(match))
319                                                            pairwiseMatchPairs.put(match, new HashSet<Annotation>());
320                                                    pairwiseMatchPairs.get(annotation).add(match);
321                                                    pairwiseMatchPairs.get(match).add(annotation);
322    
323                                                    if (matchResult.getResult() == MatchResult.NONTRIVIAL_MATCH) {
324                                                            nontrivialPairwiseMatches.get(setName).get(compareSetName).add(annotation);
325                                                            nontrivialPairwiseMatches.get(compareSetName).get(setName).add(match);
326                                                    } else if (matchResult.getResult() == MatchResult.TRIVIAL_MATCH) {
327                                                            trivialPairwiseMatches.get(setName).get(compareSetName).add(annotation);
328                                                            trivialPairwiseMatches.get(compareSetName).get(setName).add(match);
329                                                    } else {
330                                                            throw new IAAException(
331                                                                            "match algorithm did not return null but the match result was not NONTRIVIAL_MATCH or TRIVIAL_MATCH");
332                                                    }
333                                            } else {
334                                                    pairwiseNonmatches.get(setName).get(compareSetName).add(annotation);
335                                                    if (matchResult.getResult() == MatchResult.NONTRIVIAL_NONMATCH)
336                                                            nontrivialPairwiseNonmatches.get(setName).get(compareSetName).add(annotation);
337                                                    else if (matchResult.getResult() == MatchResult.TRIVIAL_NONMATCH)
338                                                            trivialPairwiseNonmatches.get(setName).get(compareSetName).add(annotation);
339                                                    else {
340                                                            throw new IAAException(
341                                                                            "match algorithm returned null be the match result was not NONTRIVIAL_NONMATCH or TRIVIAL_NONMATCH");
342                                                    }
343                                            }
344                                    }
345                            }
346                    }
347            }
348    
349            public Set<Annotation> match(Annotation annotation, Set<Annotation> excludeAnnotations, Matcher matcher,
350                            MatchResult matchResult) {
351                    String setName = annotation.getSetName();
352                    Set<Annotation> matchedAnnotations = new HashSet<Annotation>();
353    
354                    // trivial matches trump non-trivial matches. If there is a single
355                    // trivial match, then trivial_match is the match result.
356                    boolean trivialMatch = false;
357                    // nontrivial nonmatches trump trivial nonmatches. If there is a single
358                    // nontrivial match, then nontrivial_nonmatch is the match result.
359                    boolean nontrivialNonmatch = false;
360    
361                    for (String compareSetName : annotationSets.keySet()) {
362                            if (!setName.equals(compareSetName)) {
363                                    MatchResult result = new MatchResult();
364                                    Annotation match = matcher.match(annotation, compareSetName, excludeAnnotations, this, result);
365                                    if (match != null) {
366                                            matchedAnnotations.add(match);
367                                            if (result.getResult() == MatchResult.TRIVIAL_MATCH) {
368                                                    trivialMatch = true;
369                                            }
370                                    } else if (result.getResult() == MatchResult.NONTRIVIAL_NONMATCH) {
371                                            nontrivialNonmatch = true;
372                                    }
373                            }
374                    }
375                    if (matchedAnnotations.size() == annotationSets.keySet().size() - 1) {
376                            if (trivialMatch)
377                                    matchResult.setResult(MatchResult.TRIVIAL_MATCH);
378                            else
379                                    matchResult.setResult(MatchResult.NONTRIVIAL_MATCH);
380                            return matchedAnnotations;
381                    } else {
382                            if (nontrivialNonmatch)
383                                    matchResult.setResult(MatchResult.NONTRIVIAL_NONMATCH);
384                            else
385                                    matchResult.setResult(MatchResult.TRIVIAL_NONMATCH);
386                            return null;
387                    }
388            }
389    
390            public Set<Annotation> getAnnotationsOfSameType(Annotation annotation, String compareSetName) {
391                    String annotationClass = annotation.getAnnotationClass();
392                    return safeReturn(class2AnnotationsMap.get(compareSetName).get(annotationClass));
393            }
394    
395            public Set<Annotation> getAnnotationsOfClass(String className, String compareSetName) {
396                    if (class2AnnotationsMap.containsKey(compareSetName)
397                                    && class2AnnotationsMap.get(compareSetName).containsKey(className)) {
398                            return class2AnnotationsMap.get(compareSetName).get(className);
399                    } else
400                            return Collections.emptySet();
401            }
402    
403            public Set<Annotation> getOverlappingAnnotations(Annotation annotation, String compareSetName) {
404                    AnnotationSpanIndex spanIndex = spanIndexes.get(compareSetName);
405                    return safeReturn(spanIndex.getOverlappingAnnotations(annotation));
406            }
407    
408            public Set<Annotation> getExactlyOverlappingAnnotations(Annotation annotation, String compareSetName) {
409                    AnnotationSpanIndex spanIndex = spanIndexes.get(compareSetName);
410                    return safeReturn(spanIndex.getExactlyOverlappingAnnotations(annotation));
411            }
412    
413            private Set<Annotation> safeReturn(Set<Annotation> returnValues) {
414                    if (returnValues == null)
415                            return emptyAnnotationSet;
416                    return returnValues;
417                    // return Collections.unmodifiableSet(returnValues);
418            }
419    
420            public Map<String, Set<Annotation>> getAllwayMatches() {
421                    return allwayMatches;
422            }
423    
424            public Map<String, Set<Annotation>> getAllwayNonmatches() {
425                    return allwayNonmatches;
426            }
427    
428            public Map<String, Map<String, Set<Annotation>>> getPairwiseMatches() {
429                    return pairwiseMatches;
430            }
431    
432            public Map<String, Map<String, Set<Annotation>>> getPairwiseNonmatches() {
433                    return pairwiseNonmatches;
434            }
435    
436            public Map<String, Set<Annotation>> getAnnotationSets() {
437                    return annotationSets;
438            }
439    
440            public Set<String> getSetNames() {
441                    return setNames;
442            }
443    
444            public Set<String> getAnnotationClasses() {
445                    return annotationClasses;
446            }
447    
448            public void setMatcherInfo(String infoLabel, Object infoObject) {
449                    matcherInfo.put(infoLabel, infoObject);
450            }
451    
452            public Object getMatcherInfo(String infoLabel) {
453                    return matcherInfo.get(infoLabel);
454            }
455    
456            public Map<String, Set<Annotation>> getTrivialAllwayMatches() {
457                    return trivialAllwayMatches;
458            }
459    
460            public Map<String, Set<Annotation>> getTrivialAllwayNonmatches() {
461                    return trivialAllwayNonmatches;
462            }
463    
464            public Map<String, Map<String, Set<Annotation>>> getTrivialPairwiseMatches() {
465                    return trivialPairwiseMatches;
466            }
467    
468            public Map<String, Map<String, Set<Annotation>>> getTrivialPairwiseNonmatches() {
469                    return trivialPairwiseNonmatches;
470            }
471    
472            public Map<String, Set<Annotation>> getNontrivialAllwayMatches() {
473                    return nontrivialAllwayMatches;
474            }
475    
476            public Map<String, Set<Annotation>> getNontrivialAllwayNonmatches() {
477                    return nontrivialAllwayNonmatches;
478            }
479    
480            public Map<String, Map<String, Set<Annotation>>> getNontrivialPairwiseMatches() {
481                    return nontrivialPairwiseMatches;
482            }
483    
484            public Map<String, Map<String, Set<Annotation>>> getNontrivialPairwiseNonmatches() {
485                    return nontrivialPairwiseNonmatches;
486            }
487    
488            public Map<Annotation, Set<Annotation>> getAllwayMatchSets() {
489                    return allwayMatchSets;
490            }
491    
492            public Map<Annotation, Set<Annotation>> getPairwiseMatchPairs() {
493                    return pairwiseMatchPairs;
494            }
495    
496    }