001 /*
002 * The contents of this file are subject to the Mozilla Public
003 * License Version 1.1 (the "License"); you may not use this file
004 * except in compliance with the License. You may obtain a copy of
005 * the License at http://www.mozilla.org/MPL/
006 *
007 * Software distributed under the License is distributed on an "AS
008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009 * implied. See the License for the specific language governing
010 * rights and limitations under the License.
011 *
012 * The Original Code is Knowtator.
013 *
014 * The Initial Developer of the Original Code is University of Colorado.
015 * Copyright (C) 2005 - 2008. All Rights Reserved.
016 *
017 * Knowtator was developed by the Center for Computational Pharmacology
018 * (http://compbio.uchcs.edu) at the University of Colorado Health
019 * Sciences Center School of Medicine with support from the National
020 * Library of Medicine.
021 *
022 * Current information about Knowtator can be obtained at
023 * http://knowtator.sourceforge.net/
024 *
025 * Contributor(s):
026 * Philip V. Ogren <philip@ogren.info> (Original Author)
027 */
028
029 package edu.uchsc.ccp.iaa.html;
030
031 import java.io.File;
032 import java.io.PrintStream;
033 import java.text.NumberFormat;
034 import java.util.ArrayList;
035 import java.util.Collections;
036 import java.util.HashMap;
037 import java.util.Map;
038 import java.util.Set;
039
040 import edu.uchsc.ccp.iaa.Annotation;
041 import edu.uchsc.ccp.iaa.AnnotationSpanIndex;
042 import edu.uchsc.ccp.iaa.IAA;
043 import edu.uchsc.ccp.iaa.matcher.Matcher;
044
045 public class SpanMatcherHTML {
046
047 public static void printIAA(IAA iaa, Matcher matcher, File directory, int numberOfDocs,
048 Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames) throws Exception {
049 NumberFormat percentageFormat = NumberFormat.getPercentInstance();
050 percentageFormat.setMinimumFractionDigits(2);
051
052 String fileName = matcher.getName();
053
054 PrintStream html = new PrintStream(new File(directory, fileName + ".html"));
055
056 html.println(IAA2HTML.initHTML(matcher.getName(), matcher.getDescription()));
057 html.println("<h2>" + iaa.getSetNames().size() + "-way IAA Results</h2>");
058
059 html.println("<table border=1><tr>" + "<td><b>Type</b></td>" + "<td><b>IAA</b></td>"
060 + "<td><b>matches</b></td>" + "<td><b>non-matches</b></td>" + "<td><b>confused class assignments</tr>");
061
062 Set<String> classes = iaa.getAnnotationClasses();
063 Set<String> sets = iaa.getSetNames();
064
065 Map<String, Set<Annotation>> allwayMatches = iaa.getAllwayMatches();
066 Map<String, Set<Annotation>> allwayNonmatches = iaa.getAllwayNonmatches();
067
068 Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets();
069
070 Set<Annotation> allwayMatchesSingleSet = IAA2HTML.getSingleSet(allwayMatches);
071 Set<Annotation> allwayNonmatchesSingleSet = IAA2HTML.getSingleSet(allwayNonmatches);
072
073 AnnotationSpanIndex spanIndex = new AnnotationSpanIndex(allwayNonmatchesSingleSet);
074
075 int totalAllwayMatches = allwayMatchesSingleSet.size();
076 int totalAllwayNonmatches = allwayNonmatchesSingleSet.size();
077
078 double iaaScore = (double) totalAllwayMatches / ((double) totalAllwayMatches + (double) totalAllwayNonmatches);
079
080 html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" + "<td>"
081 + totalAllwayMatches + "</td>" + "<td>" + totalAllwayNonmatches + "</td></tr>");
082
083 Map<String, Set<Annotation>> sortedAllwayMatches = IAA2HTML.sortByType(classes, allwayMatchesSingleSet);
084 Map<String, Set<Annotation>> sortedAllwayNonmatches = IAA2HTML.sortByType(classes, allwayNonmatchesSingleSet);
085
086 java.util.List<String> sortedTypes = new ArrayList<String>(classes);
087 Collections.sort(sortedTypes);
088
089 for (String type : sortedTypes) {
090 int classMatches = sortedAllwayMatches.get(type).size();
091 int classNonmatches = sortedAllwayNonmatches.get(type).size();
092
093 iaaScore = (double) classMatches / ((double) classMatches + (double) classNonmatches);
094
095 html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" + "<td>"
096 + classMatches + "</td>" + "<td>" + classNonmatches + "</td>");
097 Map<String, int[]> confusionCounts = errorMatrix(sortedAllwayMatches.get(type), matchSets);
098 html.println("<td>");
099 for (String confusedClass : confusionCounts.keySet()) {
100 html.println(" " + confusedClass + "=" + confusionCounts.get(confusedClass)[0]);
101 }
102 html.println("</td>");
103 }
104 html.println("</table>");
105
106 html.println("<br>IAA calculated on " + numberOfDocs + " documents.");
107 html.println("<br>all annotations = matches + non-matches");
108 html.println("<br>IAA = matches / all annotations");
109
110 IAA2HTML.printMatchData(html, sets, fileName, directory, allwayMatches, annotationTexts, annotationTextNames,
111 classes, iaa);
112
113 IAA2HTML.printNonmatchData(html, sets, fileName, directory, allwayNonmatches, spanIndex, annotationTexts,
114 annotationTextNames, classes, iaa);
115
116 Map<String, Map<String, Set<Annotation>>> pairwiseMatches = iaa.getPairwiseMatches();
117 Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches = iaa.getPairwiseNonmatches();
118
119 IAA2HTML.printPairwiseAgreement(html, sets, pairwiseMatches, pairwiseNonmatches, percentageFormat);
120
121 html.flush();
122 html.close();
123 }
124
125 public static Map<String, int[]> errorMatrix(Set<Annotation> matches, Map<Annotation, Set<Annotation>> matchSets) {
126 Map<String, int[]> counts = new HashMap<String, int[]>();
127
128 for (Annotation match : matches) {
129 Set<Annotation> matchedAnnotations = matchSets.get(match);
130 for (Annotation matchedAnnotation : matchedAnnotations) {
131 if (!matchedAnnotation.equals(match)) {
132 String annotationClass = matchedAnnotation.getAnnotationClass();
133 if (!counts.containsKey(annotationClass)) {
134 counts.put(annotationClass, new int[1]);
135 }
136 counts.get(annotationClass)[0]++;
137 }
138 }
139 }
140 return counts;
141 }
142
143 }