001 /* 002 * The contents of this file are subject to the Mozilla Public 003 * License Version 1.1 (the "License"); you may not use this file 004 * except in compliance with the License. You may obtain a copy of 005 * the License at http://www.mozilla.org/MPL/ 006 * 007 * Software distributed under the License is distributed on an "AS 008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 009 * implied. See the License for the specific language governing 010 * rights and limitations under the License. 011 * 012 * The Original Code is Knowtator. 013 * 014 * The Initial Developer of the Original Code is University of Colorado. 015 * Copyright (C) 2005 - 2008. All Rights Reserved. 016 * 017 * Knowtator was developed by the Center for Computational Pharmacology 018 * (http://compbio.uchcs.edu) at the University of Colorado Health 019 * Sciences Center School of Medicine with support from the National 020 * Library of Medicine. 021 * 022 * Current information about Knowtator can be obtained at 023 * http://knowtator.sourceforge.net/ 024 * 025 * Contributor(s): 026 * Philip V. Ogren <philip@ogren.info> (Original Author) 027 */ 028 029 package edu.uchsc.ccp.iaa.html; 030 031 import java.io.File; 032 import java.io.PrintStream; 033 import java.text.NumberFormat; 034 import java.util.ArrayList; 035 import java.util.Collections; 036 import java.util.HashMap; 037 import java.util.Map; 038 import java.util.Set; 039 040 import edu.uchsc.ccp.iaa.Annotation; 041 import edu.uchsc.ccp.iaa.AnnotationSpanIndex; 042 import edu.uchsc.ccp.iaa.IAA; 043 import edu.uchsc.ccp.iaa.matcher.Matcher; 044 045 public class SpanMatcherHTML { 046 047 public static void printIAA(IAA iaa, Matcher matcher, File directory, int numberOfDocs, 048 Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames) throws Exception { 049 NumberFormat percentageFormat = NumberFormat.getPercentInstance(); 050 percentageFormat.setMinimumFractionDigits(2); 051 052 String fileName = matcher.getName(); 053 054 PrintStream html = new PrintStream(new File(directory, fileName + ".html")); 055 056 html.println(IAA2HTML.initHTML(matcher.getName(), matcher.getDescription())); 057 html.println("<h2>" + iaa.getSetNames().size() + "-way IAA Results</h2>"); 058 059 html.println("<table border=1><tr>" + "<td><b>Type</b></td>" + "<td><b>IAA</b></td>" 060 + "<td><b>matches</b></td>" + "<td><b>non-matches</b></td>" + "<td><b>confused class assignments</tr>"); 061 062 Set<String> classes = iaa.getAnnotationClasses(); 063 Set<String> sets = iaa.getSetNames(); 064 065 Map<String, Set<Annotation>> allwayMatches = iaa.getAllwayMatches(); 066 Map<String, Set<Annotation>> allwayNonmatches = iaa.getAllwayNonmatches(); 067 068 Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets(); 069 070 Set<Annotation> allwayMatchesSingleSet = IAA2HTML.getSingleSet(allwayMatches); 071 Set<Annotation> allwayNonmatchesSingleSet = IAA2HTML.getSingleSet(allwayNonmatches); 072 073 AnnotationSpanIndex spanIndex = new AnnotationSpanIndex(allwayNonmatchesSingleSet); 074 075 int totalAllwayMatches = allwayMatchesSingleSet.size(); 076 int totalAllwayNonmatches = allwayNonmatchesSingleSet.size(); 077 078 double iaaScore = (double) totalAllwayMatches / ((double) totalAllwayMatches + (double) totalAllwayNonmatches); 079 080 html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" + "<td>" 081 + totalAllwayMatches + "</td>" + "<td>" + totalAllwayNonmatches + "</td></tr>"); 082 083 Map<String, Set<Annotation>> sortedAllwayMatches = IAA2HTML.sortByType(classes, allwayMatchesSingleSet); 084 Map<String, Set<Annotation>> sortedAllwayNonmatches = IAA2HTML.sortByType(classes, allwayNonmatchesSingleSet); 085 086 java.util.List<String> sortedTypes = new ArrayList<String>(classes); 087 Collections.sort(sortedTypes); 088 089 for (String type : sortedTypes) { 090 int classMatches = sortedAllwayMatches.get(type).size(); 091 int classNonmatches = sortedAllwayNonmatches.get(type).size(); 092 093 iaaScore = (double) classMatches / ((double) classMatches + (double) classNonmatches); 094 095 html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" + "<td>" 096 + classMatches + "</td>" + "<td>" + classNonmatches + "</td>"); 097 Map<String, int[]> confusionCounts = errorMatrix(sortedAllwayMatches.get(type), matchSets); 098 html.println("<td>"); 099 for (String confusedClass : confusionCounts.keySet()) { 100 html.println(" " + confusedClass + "=" + confusionCounts.get(confusedClass)[0]); 101 } 102 html.println("</td>"); 103 } 104 html.println("</table>"); 105 106 html.println("<br>IAA calculated on " + numberOfDocs + " documents."); 107 html.println("<br>all annotations = matches + non-matches"); 108 html.println("<br>IAA = matches / all annotations"); 109 110 IAA2HTML.printMatchData(html, sets, fileName, directory, allwayMatches, annotationTexts, annotationTextNames, 111 classes, iaa); 112 113 IAA2HTML.printNonmatchData(html, sets, fileName, directory, allwayNonmatches, spanIndex, annotationTexts, 114 annotationTextNames, classes, iaa); 115 116 Map<String, Map<String, Set<Annotation>>> pairwiseMatches = iaa.getPairwiseMatches(); 117 Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches = iaa.getPairwiseNonmatches(); 118 119 IAA2HTML.printPairwiseAgreement(html, sets, pairwiseMatches, pairwiseNonmatches, percentageFormat); 120 121 html.flush(); 122 html.close(); 123 } 124 125 public static Map<String, int[]> errorMatrix(Set<Annotation> matches, Map<Annotation, Set<Annotation>> matchSets) { 126 Map<String, int[]> counts = new HashMap<String, int[]>(); 127 128 for (Annotation match : matches) { 129 Set<Annotation> matchedAnnotations = matchSets.get(match); 130 for (Annotation matchedAnnotation : matchedAnnotations) { 131 if (!matchedAnnotation.equals(match)) { 132 String annotationClass = matchedAnnotation.getAnnotationClass(); 133 if (!counts.containsKey(annotationClass)) { 134 counts.put(annotationClass, new int[1]); 135 } 136 counts.get(annotationClass)[0]++; 137 } 138 } 139 } 140 return counts; 141 } 142 143 }