001    /*
002     * The contents of this file are subject to the Mozilla Public
003     * License Version 1.1 (the "License"); you may not use this file
004     * except in compliance with the License. You may obtain a copy of
005     * the License at http://www.mozilla.org/MPL/
006     *
007     * Software distributed under the License is distributed on an "AS
008     * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009     * implied. See the License for the specific language governing
010     * rights and limitations under the License.
011     *
012     * The Original Code is Knowtator.
013     *
014     * The Initial Developer of the Original Code is University of Colorado.  
015     * Copyright (C) 2005 - 2008.  All Rights Reserved.
016     *
017     * Knowtator was developed by the Center for Computational Pharmacology
018     * (http://compbio.uchcs.edu) at the University of Colorado Health 
019     *  Sciences Center School of Medicine with support from the National 
020     *  Library of Medicine.  
021     *
022     * Current information about Knowtator can be obtained at 
023     * http://knowtator.sourceforge.net/
024     *
025     * Contributor(s):
026     *   Philip V. Ogren <philip@ogren.info> (Original Author)
027     */
028    
029    package edu.uchsc.ccp.iaa.html;
030    
031    import java.io.File;
032    import java.io.PrintStream;
033    import java.text.NumberFormat;
034    import java.util.ArrayList;
035    import java.util.Collections;
036    import java.util.HashMap;
037    import java.util.Map;
038    import java.util.Set;
039    
040    import edu.uchsc.ccp.iaa.Annotation;
041    import edu.uchsc.ccp.iaa.AnnotationSpanIndex;
042    import edu.uchsc.ccp.iaa.IAA;
043    import edu.uchsc.ccp.iaa.matcher.Matcher;
044    
045    public class SpanMatcherHTML {
046    
047            public static void printIAA(IAA iaa, Matcher matcher, File directory, int numberOfDocs,
048                            Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames) throws Exception {
049                    NumberFormat percentageFormat = NumberFormat.getPercentInstance();
050                    percentageFormat.setMinimumFractionDigits(2);
051    
052                    String fileName = matcher.getName();
053    
054                    PrintStream html = new PrintStream(new File(directory, fileName + ".html"));
055    
056                    html.println(IAA2HTML.initHTML(matcher.getName(), matcher.getDescription()));
057                    html.println("<h2>" + iaa.getSetNames().size() + "-way IAA Results</h2>");
058    
059                    html.println("<table border=1><tr>" + "<td><b>Type</b></td>" + "<td><b>IAA</b></td>"
060                                    + "<td><b>matches</b></td>" + "<td><b>non-matches</b></td>" + "<td><b>confused class assignments</tr>");
061    
062                    Set<String> classes = iaa.getAnnotationClasses();
063                    Set<String> sets = iaa.getSetNames();
064    
065                    Map<String, Set<Annotation>> allwayMatches = iaa.getAllwayMatches();
066                    Map<String, Set<Annotation>> allwayNonmatches = iaa.getAllwayNonmatches();
067    
068                    Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets();
069    
070                    Set<Annotation> allwayMatchesSingleSet = IAA2HTML.getSingleSet(allwayMatches);
071                    Set<Annotation> allwayNonmatchesSingleSet = IAA2HTML.getSingleSet(allwayNonmatches);
072    
073                    AnnotationSpanIndex spanIndex = new AnnotationSpanIndex(allwayNonmatchesSingleSet);
074    
075                    int totalAllwayMatches = allwayMatchesSingleSet.size();
076                    int totalAllwayNonmatches = allwayNonmatchesSingleSet.size();
077    
078                    double iaaScore = (double) totalAllwayMatches / ((double) totalAllwayMatches + (double) totalAllwayNonmatches);
079    
080                    html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" + "<td>"
081                                    + totalAllwayMatches + "</td>" + "<td>" + totalAllwayNonmatches + "</td></tr>");
082    
083                    Map<String, Set<Annotation>> sortedAllwayMatches = IAA2HTML.sortByType(classes, allwayMatchesSingleSet);
084                    Map<String, Set<Annotation>> sortedAllwayNonmatches = IAA2HTML.sortByType(classes, allwayNonmatchesSingleSet);
085    
086                    java.util.List<String> sortedTypes = new ArrayList<String>(classes);
087                    Collections.sort(sortedTypes);
088    
089                    for (String type : sortedTypes) {
090                            int classMatches = sortedAllwayMatches.get(type).size();
091                            int classNonmatches = sortedAllwayNonmatches.get(type).size();
092    
093                            iaaScore = (double) classMatches / ((double) classMatches + (double) classNonmatches);
094    
095                            html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" + "<td>"
096                                            + classMatches + "</td>" + "<td>" + classNonmatches + "</td>");
097                            Map<String, int[]> confusionCounts = errorMatrix(sortedAllwayMatches.get(type), matchSets);
098                            html.println("<td>");
099                            for (String confusedClass : confusionCounts.keySet()) {
100                                    html.println("  " + confusedClass + "=" + confusionCounts.get(confusedClass)[0]);
101                            }
102                            html.println("</td>");
103                    }
104                    html.println("</table>");
105    
106                    html.println("<br>IAA calculated on " + numberOfDocs + " documents.");
107                    html.println("<br>all annotations = matches + non-matches");
108                    html.println("<br>IAA = matches / all annotations");
109    
110                    IAA2HTML.printMatchData(html, sets, fileName, directory, allwayMatches, annotationTexts, annotationTextNames,
111                                    classes, iaa);
112    
113                    IAA2HTML.printNonmatchData(html, sets, fileName, directory, allwayNonmatches, spanIndex, annotationTexts,
114                                    annotationTextNames, classes, iaa);
115    
116                    Map<String, Map<String, Set<Annotation>>> pairwiseMatches = iaa.getPairwiseMatches();
117                    Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches = iaa.getPairwiseNonmatches();
118    
119                    IAA2HTML.printPairwiseAgreement(html, sets, pairwiseMatches, pairwiseNonmatches, percentageFormat);
120    
121                    html.flush();
122                    html.close();
123            }
124    
125            public static Map<String, int[]> errorMatrix(Set<Annotation> matches, Map<Annotation, Set<Annotation>> matchSets) {
126                    Map<String, int[]> counts = new HashMap<String, int[]>();
127    
128                    for (Annotation match : matches) {
129                            Set<Annotation> matchedAnnotations = matchSets.get(match);
130                            for (Annotation matchedAnnotation : matchedAnnotations) {
131                                    if (!matchedAnnotation.equals(match)) {
132                                            String annotationClass = matchedAnnotation.getAnnotationClass();
133                                            if (!counts.containsKey(annotationClass)) {
134                                                    counts.put(annotationClass, new int[1]);
135                                            }
136                                            counts.get(annotationClass)[0]++;
137                                    }
138                            }
139                    }
140                    return counts;
141            }
142    
143    }