001 /* 002 * The contents of this file are subject to the Mozilla Public 003 * License Version 1.1 (the "License"); you may not use this file 004 * except in compliance with the License. You may obtain a copy of 005 * the License at http://www.mozilla.org/MPL/ 006 * 007 * Software distributed under the License is distributed on an "AS 008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 009 * implied. See the License for the specific language governing 010 * rights and limitations under the License. 011 * 012 * The Original Code is Knowtator. 013 * 014 * The Initial Developer of the Original Code is University of Colorado. 015 * Copyright (C) 2005 - 2008. All Rights Reserved. 016 * 017 * Knowtator was developed by the Center for Computational Pharmacology 018 * (http://compbio.uchcs.edu) at the University of Colorado Health 019 * Sciences Center School of Medicine with support from the National 020 * Library of Medicine. 021 * 022 * Current information about Knowtator can be obtained at 023 * http://knowtator.sourceforge.net/ 024 * 025 * Contributor(s): 026 * Philip V. Ogren <philip@ogren.info> (Original Author) 027 */ 028 029 package edu.uchsc.ccp.iaa.html; 030 031 import java.io.File; 032 import java.io.IOException; 033 import java.io.PrintStream; 034 import java.text.NumberFormat; 035 import java.util.ArrayList; 036 import java.util.Collection; 037 import java.util.Collections; 038 import java.util.HashMap; 039 import java.util.HashSet; 040 import java.util.List; 041 import java.util.Map; 042 import java.util.Set; 043 044 import edu.uchsc.ccp.iaa.Annotation; 045 import edu.uchsc.ccp.iaa.AnnotationSpanIndex; 046 import edu.uchsc.ccp.iaa.IAA; 047 import edu.uchsc.ccp.iaa.Span; 048 import edu.uchsc.ccp.iaa.matcher.Matcher; 049 050 public class IAA2HTML { 051 052 public static void printIAA(IAA iaa, Matcher matcher, File directory, int numberOfDocs, 053 Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames) throws Exception { 054 NumberFormat percentageFormat = NumberFormat.getPercentInstance(); 055 percentageFormat.setMinimumFractionDigits(2); 056 057 String fileName = matcher.getName(); 058 059 PrintStream tabular = new PrintStream(new File(directory, fileName + ".dat")); 060 PrintStream html = new PrintStream(new File(directory, fileName + ".html")); 061 062 initHTML(html, matcher.getName(), null, null, matcher.getDescription()); 063 064 printIntro(html, iaa, numberOfDocs, fileName, matcher); 065 html.println("<p>"); 066 printTitleRowForAllwayIAA(html, matcher); 067 068 tabular 069 .println("This file is provided to facilitate cut-n-paste into a spreadsheet.\n" 070 + "If you cannot directly copy the data below into a spreadsheet without it all going into a single cell,\n" 071 + "then try copying to a text editor first and then copy it from there. There is typically a 'paste special'\n" 072 + "option under the Edit menu that will allow you to paste the copied data as text. This will also work.\n\n\n"); 073 074 if (matcher.returnsTrivials()) 075 tabular 076 .println("type\tmatches\ttrivial matches\tnon-trivial matches\tnon-matches\ttrivial non-matches\tnon-trivial non-matches"); 077 else 078 tabular.println("type\tmatches\tnon-matches"); 079 080 Set<String> classes = iaa.getAnnotationClasses(); 081 Set<String> sets = iaa.getSetNames(); 082 083 Map<String, Set<Annotation>> allwayMatches = iaa.getAllwayMatches(); 084 Map<String, Set<Annotation>> nontrivialAllwayMatches = iaa.getNontrivialAllwayMatches(); 085 Map<String, Set<Annotation>> trivialAllwayMatches = iaa.getTrivialAllwayMatches(); 086 Map<String, Set<Annotation>> allwayNonmatches = iaa.getAllwayNonmatches(); 087 Map<String, Set<Annotation>> nontrivialAllwayNonmatches = iaa.getNontrivialAllwayNonmatches(); 088 Map<String, Set<Annotation>> trivialAllwayNonmatches = iaa.getTrivialAllwayNonmatches(); 089 090 Set<Annotation> allwayMatchesSingleSet = getSingleSet(allwayMatches); 091 Set<Annotation> trivialAllwayMatchesSingleSet = getSingleSet(trivialAllwayMatches); 092 Set<Annotation> nontrivialAllwayMatchesSingleSet = getSingleSet(nontrivialAllwayMatches); 093 Set<Annotation> allwayNonmatchesSingleSet = getSingleSet(allwayNonmatches); 094 Set<Annotation> trivialAllwayNonmatchesSingleSet = getSingleSet(trivialAllwayNonmatches); 095 Set<Annotation> nontrivialAllwayNonmatchesSingleSet = getSingleSet(nontrivialAllwayNonmatches); 096 097 AnnotationSpanIndex spanIndex = new AnnotationSpanIndex(allwayNonmatchesSingleSet); 098 099 int totalAllwayMatches = allwayMatchesSingleSet.size(); 100 int totalTrivialAllwayMatches = trivialAllwayMatchesSingleSet.size(); 101 int totalNontrivialAllwayMatches = nontrivialAllwayMatchesSingleSet.size(); 102 int totalAllwayNonmatches = allwayNonmatchesSingleSet.size(); 103 int totalTrivialAllwayNonmatches = trivialAllwayNonmatchesSingleSet.size(); 104 int totalNontrivialAllwayNonmatches = nontrivialAllwayNonmatchesSingleSet.size(); 105 106 double iaaScore = (double) totalAllwayMatches / ((double) totalAllwayMatches + (double) totalAllwayNonmatches); 107 double stingyIAAScore = (double) totalNontrivialAllwayMatches 108 / ((double) totalAllwayMatches + (double) totalAllwayNonmatches); 109 double respectableIAAScore = (double) totalNontrivialAllwayMatches 110 / ((double) totalNontrivialAllwayMatches + (double) totalAllwayNonmatches); 111 double nontrivialIAAScore = (double) totalNontrivialAllwayMatches 112 / ((double) totalNontrivialAllwayMatches + (double) totalNontrivialAllwayNonmatches); 113 114 if (matcher.returnsTrivials()) { 115 html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" 116 + "<td>" + percentageFormat.format(stingyIAAScore) + "</td>" + "<td>" 117 + percentageFormat.format(respectableIAAScore) + "</td>" + "<td>" 118 + percentageFormat.format(nontrivialIAAScore) + "</td>" + "<td>" + totalAllwayMatches + "</td>" 119 + "<td>" + totalTrivialAllwayMatches + "</td>" + "<td>" + totalNontrivialAllwayMatches + "</td>" 120 + "<td>" + totalAllwayNonmatches + "</td>" + "<td>" + totalTrivialAllwayNonmatches + "</td>" 121 + "<td>" + totalNontrivialAllwayNonmatches + "</td></tr>"); 122 tabular.println("All classes\t" + totalAllwayMatches + "\t" + totalTrivialAllwayMatches + "\t" 123 + totalNontrivialAllwayMatches + "\t" + totalAllwayNonmatches + "\t" + totalTrivialAllwayNonmatches 124 + "\t" + totalNontrivialAllwayNonmatches); 125 } else { 126 html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" 127 + "<td>" + totalAllwayMatches + "</td>" + "<td>" + totalAllwayNonmatches + "</td></tr>"); 128 tabular.println("All classes\t" + totalAllwayMatches + "\t" + totalAllwayNonmatches); 129 } 130 131 Map<String, Set<Annotation>> sortedAllwayMatches = sortByType(classes, allwayMatchesSingleSet); 132 Map<String, Set<Annotation>> sortedAllwayTrivialMatches = sortByType(classes, trivialAllwayMatchesSingleSet); 133 Map<String, Set<Annotation>> sortedAllwayNontrivialMatches = sortByType(classes, 134 nontrivialAllwayMatchesSingleSet); 135 Map<String, Set<Annotation>> sortedAllwayNonmatches = sortByType(classes, allwayNonmatchesSingleSet); 136 Map<String, Set<Annotation>> sortedAllwayTrivialNonmatches = sortByType(classes, 137 trivialAllwayNonmatchesSingleSet); 138 Map<String, Set<Annotation>> sortedAllwayNontrivialNonmatches = sortByType(classes, 139 nontrivialAllwayNonmatchesSingleSet); 140 141 java.util.List<String> sortedTypes = new ArrayList<String>(classes); 142 Collections.sort(sortedTypes); 143 144 for (String type : sortedTypes) { 145 int classMatches = sortedAllwayMatches.get(type).size(); 146 int classTrivialMatches = sortedAllwayTrivialMatches.get(type).size(); 147 int classNontrivialMatches = sortedAllwayNontrivialMatches.get(type).size(); 148 int classNonmatches = sortedAllwayNonmatches.get(type).size(); 149 int classTrivialNonmatches = sortedAllwayTrivialNonmatches.get(type).size(); 150 int classNontrivialNonmatches = sortedAllwayNontrivialNonmatches.get(type).size(); 151 152 iaaScore = (double) classMatches / ((double) classMatches + (double) classNonmatches); 153 stingyIAAScore = (double) classNontrivialMatches / ((double) classMatches + (double) classNonmatches); 154 respectableIAAScore = (double) classNontrivialMatches 155 / ((double) classNontrivialMatches + (double) classNonmatches); 156 nontrivialIAAScore = (double) classNontrivialMatches 157 / ((double) classNontrivialMatches + (double) classNontrivialNonmatches); 158 159 if (matcher.returnsTrivials()) { 160 html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" 161 + "<td>" + percentageFormat.format(stingyIAAScore) + "</td>" + "<td>" 162 + percentageFormat.format(respectableIAAScore) + "</td>" + "<td>" 163 + percentageFormat.format(nontrivialIAAScore) + "</td>" + "<td>" + classMatches + "</td>" 164 + "<td>" + classTrivialMatches + "</td>" + "<td>" + classNontrivialMatches + "</td>" + "<td>" 165 + classNonmatches + "</td>" + "<td>" + classTrivialNonmatches + "</td>" + "<td>" 166 + classNontrivialNonmatches + "</td></tr>"); 167 tabular.println(type + "\t" + classMatches + "\t" + classTrivialMatches + "\t" + classNontrivialMatches 168 + "\t" + classNonmatches + "\t" + classTrivialNonmatches + "\t" + classNontrivialNonmatches); 169 } else { 170 html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>" 171 + "<td>" + classMatches + "</td>" + "<td>" + classNonmatches + "</td></tr>"); 172 tabular.println(type + "\t" + classMatches + "\t" + classNonmatches); 173 } 174 } 175 html.println("</table>"); 176 177 printMatchData(html, sets, fileName, directory, allwayMatches, classes, spanIndex, sortedTypes, 178 annotationTexts, annotationTextNames, matcher, trivialAllwayMatches, nontrivialAllwayMatches, iaa); 179 180 printNonmatchData(html, sets, fileName, directory, allwayNonmatches, classes, spanIndex, sortedTypes, 181 annotationTexts, annotationTextNames, matcher, trivialAllwayNonmatches, nontrivialAllwayNonmatches); 182 183 Map<String, Map<String, Set<Annotation>>> pairwiseMatches = iaa.getPairwiseMatches(); 184 Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches = iaa.getPairwiseNonmatches(); 185 186 printPairwiseAgreement(html, sets, pairwiseMatches, pairwiseNonmatches, percentageFormat); 187 188 html.flush(); 189 html.close(); 190 tabular.flush(); 191 tabular.close(); 192 } 193 194 private static void printMatchData(PrintStream html, Set<String> sets, String fileName, File directory, 195 Map<String, Set<Annotation>> allwayMatches, Set<String> classes, AnnotationSpanIndex spanIndex, 196 List<String> sortedTypes, Map<Annotation, String> annotationTexts, 197 Map<Annotation, String> annotationTextNames, Matcher matcher, 198 Map<String, Set<Annotation>> trivialAllwayMatches, Map<String, Set<Annotation>> nontrivialAllwayMatches, 199 IAA iaa) throws Exception { 200 html.println("<h2>match data</h2>"); 201 html.println("<ul>"); 202 203 Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets(); 204 205 for (String set : sets) { 206 String matchesFileName = fileName + ".matches." + set + ".html"; 207 html.println("<li><a href=\"" + matchesFileName + "\">matches for " + set + "</a></li>"); 208 PrintStream matchesStream = new PrintStream(new File(directory, matchesFileName)); 209 Set<Annotation> matches = allwayMatches.get(set); 210 Map<String, Set<Annotation>> sortedMatches = sortByType(classes, matches); 211 212 initHTML( 213 matchesStream, 214 "Matches for " + set, 215 fileName + ".html", 216 fileName, 217 "Each annotation that was considered a match is shown in the text that it was found in. The matching annotations from the other annotation sets are also shown."); 218 printInstances(matchesStream, sortedMatches, sortedTypes, annotationTexts, annotationTextNames, matchSets); 219 matchesStream.flush(); 220 matchesStream.close(); 221 222 if (matcher.returnsTrivials()) { 223 String trivialMatchesFileName = fileName + ".trivial.matches." + set + ".html"; 224 html.println("<li><a href=\"" + trivialMatchesFileName + "\">trivial matches for " + set + "</a></li>"); 225 PrintStream trivialMatchesStream = new PrintStream(new File(directory, trivialMatchesFileName)); 226 Set<Annotation> trivialMatches = trivialAllwayMatches.get(set); 227 Map<String, Set<Annotation>> sortedTrivialMatches = sortByType(classes, trivialMatches); 228 initHTML( 229 trivialMatchesStream, 230 "Trivial matches for " + set, 231 fileName + ".html", 232 fileName, 233 "Each annotation that was considered a trival match is shown in the text that it was found in. The matching annotations from the other annotation sets are also shown."); 234 printInstances(trivialMatchesStream, sortedTrivialMatches, sortedTypes, annotationTexts, 235 annotationTextNames, matchSets); 236 trivialMatchesStream.flush(); 237 trivialMatchesStream.close(); 238 239 String nontrivialMatchesFileName = fileName + ".nontrivial.matches." + set + ".html"; 240 html.println("<li><a href=\"" + nontrivialMatchesFileName + "\">non-trivial matches for " + set 241 + "</a></li>"); 242 PrintStream nontrivialMatchesStream = new PrintStream(new File(directory, nontrivialMatchesFileName)); 243 Set<Annotation> nontrivialMatches = nontrivialAllwayMatches.get(set); 244 Map<String, Set<Annotation>> sortedNontrivialMatches = sortByType(classes, nontrivialMatches); 245 initHTML( 246 nontrivialMatchesStream, 247 "non-trivial non-matches for " + set, 248 fileName + ".html", 249 fileName, 250 "Each annotation that was considered a non-trival match is shown in the text that it was found in. The matching from the other annotation sets are also shown."); 251 printInstances(nontrivialMatchesStream, sortedNontrivialMatches, sortedTypes, annotationTexts, 252 annotationTextNames, matchSets); 253 nontrivialMatchesStream.flush(); 254 nontrivialMatchesStream.close(); 255 } 256 } 257 html.println("</ul><hr>"); 258 259 } 260 261 private static void printNonmatchData(PrintStream html, Set<String> sets, String fileName, File directory, 262 Map<String, Set<Annotation>> allwayNonmatches, Set<String> classes, AnnotationSpanIndex spanIndex, 263 List<String> sortedTypes, Map<Annotation, String> annotationTexts, 264 Map<Annotation, String> annotationTextNames, Matcher matcher, 265 Map<String, Set<Annotation>> trivialAllwayNonmatches, 266 Map<String, Set<Annotation>> nontrivialAllwayNonmatches) throws Exception { 267 html.println("<h2>non-match data</h2>"); 268 html.println("<ul>"); 269 270 for (String set : sets) { 271 String errorsFileName = fileName + ".nonmatches." + set + ".html"; 272 html.println("<li><a href=\"" + errorsFileName + "\">non-matches for " + set + "</a></li>"); 273 PrintStream errors = new PrintStream(new File(directory, errorsFileName)); 274 Set<Annotation> nonmatches = allwayNonmatches.get(set); 275 Map<String, Set<Annotation>> sortedNonmatches = sortByType(classes, nonmatches); 276 277 Map<Annotation, Set<Annotation>> comparisonAnnotations = new HashMap<Annotation, Set<Annotation>>(); 278 for (Annotation nonmatch : nonmatches) { 279 comparisonAnnotations.put(nonmatch, getCandidateAnnotations(nonmatch, spanIndex)); 280 } 281 282 initHTML( 283 errors, 284 "Non-matches for " + set, 285 fileName + ".html", 286 fileName, 287 "Each annotation that was considered a non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown."); 288 printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts, annotationTextNames, 289 comparisonAnnotations); 290 errors.flush(); 291 errors.close(); 292 293 if (matcher.returnsTrivials()) { 294 String trivialNonMatchesFileName = fileName + ".trivial.nonmatches." + set + ".html"; 295 html.println("<li><a href=\"" + trivialNonMatchesFileName + "\">trivial non-matches for " + set 296 + "</a></li>"); 297 PrintStream trivialErrors = new PrintStream(new File(directory, trivialNonMatchesFileName)); 298 Set<Annotation> trivialNonmatches = trivialAllwayNonmatches.get(set); 299 Map<String, Set<Annotation>> sortedTrivialNonmatches = sortByType(classes, trivialNonmatches); 300 initHTML( 301 trivialErrors, 302 "Trivial non-matches for " + set, 303 fileName + ".html", 304 fileName, 305 "Each annotation that was considered a trival non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown."); 306 printInstances(trivialErrors, sortedTrivialNonmatches, sortedTypes, annotationTexts, 307 annotationTextNames, comparisonAnnotations); 308 trivialErrors.flush(); 309 trivialErrors.close(); 310 311 String nontrivialNonMatchesFileName = fileName + ".nontrivial.nonmatches." + set + ".html"; 312 html.println("<li><a href=\"" + nontrivialNonMatchesFileName + "\">non-trivial non-matches for " + set 313 + "</a></li>"); 314 PrintStream nontrivialErrors = new PrintStream(new File(directory, nontrivialNonMatchesFileName)); 315 Set<Annotation> nontrivialNonmatches = nontrivialAllwayNonmatches.get(set); 316 Map<String, Set<Annotation>> sortedNontrivialNonmatches = sortByType(classes, nontrivialNonmatches); 317 initHTML( 318 nontrivialErrors, 319 "non-trivial non-matches for " + set, 320 fileName + ".html", 321 fileName, 322 "Each annotation that was considered a non-trival non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown."); 323 printInstances(nontrivialErrors, sortedNontrivialNonmatches, sortedTypes, annotationTexts, 324 annotationTextNames, comparisonAnnotations); 325 nontrivialErrors.flush(); 326 nontrivialErrors.close(); 327 } 328 } 329 html.println("</ul><hr>"); 330 } 331 332 public static Set<Annotation> getCandidateAnnotations(Annotation annotation, AnnotationSpanIndex spanIndex) { 333 Set<Annotation> candidateAnnotations = new HashSet<Annotation>(); 334 String set = annotation.getSetName(); 335 String docID = annotation.getDocID(); 336 337 Set<Annotation> overlappingAnnotations = spanIndex.getOverlappingAnnotations(annotation); 338 for (Annotation overlappingAnnotation : overlappingAnnotations) { 339 String candidateAnnotationSet = overlappingAnnotation.getSetName(); 340 if (!candidateAnnotationSet.equals(set)) { 341 String candidateDocID = overlappingAnnotation.getDocID(); 342 if (candidateDocID.equals(docID)) { 343 candidateAnnotations.add(overlappingAnnotation); 344 } 345 } 346 } 347 return candidateAnnotations; 348 } 349 350 public static void printInstances(PrintStream out, Map<String, Set<Annotation>> sortedAnnotations, 351 java.util.List<String> sortedTypes, Map<Annotation, String> annotationTexts, 352 Map<Annotation, String> annotationTextNames, Map<Annotation, Set<Annotation>> comparisonAnnotations) { 353 for (String type : sortedTypes) { 354 out.println("<h2>" + type + "</h2>"); 355 Set<Annotation> typeAnnotations = sortedAnnotations.get(type); 356 for (Annotation annotation : typeAnnotations) { 357 writeAnnotationTextSourceHTML(out, annotation, annotationTexts.get(annotation), annotationTextNames 358 .get(annotation)); 359 out.println("<ul><li>"); 360 printAnnotationHTML(out, annotation, annotationTexts.get(annotation)); 361 362 Set<Annotation> comparisons = comparisonAnnotations.get(annotation); 363 if (comparisons != null) { 364 for (Annotation comparisonAnnotation : comparisons) { 365 if (!comparisonAnnotation.equals(annotation)) { 366 out.println("<li>"); 367 printAnnotationHTML(out, comparisonAnnotation, annotationTexts.get(comparisonAnnotation)); 368 } 369 } 370 } 371 out.println("</ul>"); 372 } 373 } 374 } 375 376 public static Set<Annotation> getSingleSet(Map<String, Set<Annotation>> annotations) { 377 Set<Annotation> returnValues = new HashSet<Annotation>(); 378 for (String setName : annotations.keySet()) { 379 returnValues.addAll(annotations.get(setName)); 380 } 381 return returnValues; 382 } 383 384 public static void initHTML(PrintStream html, String title, String link, String linkLabel, String description) { 385 html.println("<html>"); 386 html.println("<head><title>" + title + "</title></head>"); 387 html.println("<body>"); 388 if (link != null) 389 html.println("<a href=\"" + link + "\">" + linkLabel + "</a>"); 390 html.println("<h1>" + title + "</h1>"); 391 html.println(description); 392 html.println("<hr>"); 393 } 394 395 public static Map<String, Set<Annotation>> sortByType(Set<String> types, Collection<Annotation> annotations) { 396 Map<String, Set<Annotation>> sortedAnnotations = new HashMap<String, Set<Annotation>>(); 397 398 for (String type : types) { 399 sortedAnnotations.put(type, new HashSet<Annotation>()); 400 } 401 for (Annotation annotation : annotations) { 402 String type = annotation.getAnnotationClass(); 403 if (type != null) 404 sortedAnnotations.get(type).add(annotation); 405 } 406 return sortedAnnotations; 407 } 408 409 static void writeAnnotationTextSourceHTML(PrintStream out, Annotation annotation, String annotationText, 410 String annotationTextName) { 411 StringBuffer html = new StringBuffer("<hr><p>"); 412 if (annotationTextName != null) 413 html.append("Text source name = " + annotationTextName + "<p>"); 414 415 if (annotationText != null) { 416 java.util.List<Span> spans = annotation.getSpans(); 417 java.util.List<Span> modifiedSpans = new ArrayList<Span>(spans); 418 419 if (spans == null || spans.size() == 0) { 420 // spans should be the recursive spans 421 } 422 annotationText = shortenText(annotationText, modifiedSpans); 423 424 int mark = 0; 425 426 for (Span span : modifiedSpans) { 427 try { 428 html.append(annotationText.substring(mark, span.getStart()) + "<b>"); 429 html.append(Span.substring(annotationText, span) + "</b>"); 430 mark = span.getEnd(); 431 } catch (StringIndexOutOfBoundsException sioobe) { 432 sioobe.printStackTrace(); 433 System.out.println("annotationText=" + annotationText); 434 System.out.println("annotation = " + annotation.getSpans().get(0)); 435 System.out.println("annotationTextName = " + annotationTextName); 436 437 } 438 439 } 440 if (mark < annotationText.length()) 441 html.append(annotationText.substring(mark)); 442 } 443 out.println(html.toString()); 444 } 445 446 private static String shortenText(String text, java.util.List<Span> spans) { 447 int frontBuffer = 150; 448 int endBuffer = 150; 449 if (spans.size() > 0) { 450 Span span = spans.get(0); 451 int start = Math.max(0, span.getStart() - frontBuffer); 452 int end = Math.min(text.length(), span.getEnd() + endBuffer); 453 String substring = text.substring(start, end); 454 455 for (int i = 0; i < spans.size(); i++) { 456 span = spans.get(i); 457 Span offsetSpan = new Span(span.getStart() - start, span.getEnd() - start); 458 spans.set(i, offsetSpan); 459 } 460 return substring; 461 } 462 return text; 463 } 464 465 static void printAnnotationHTML(PrintStream out, Annotation annotation, String annotationText) { 466 StringBuffer html = new StringBuffer(); 467 468 if (annotationText != null) { 469 String coveredText = Annotation.getCoveredText(annotation, annotationText, " ... "); 470 html.append(coveredText); 471 } 472 html.append(" " + annotation.toHTML()); 473 out.print(html.toString()); 474 } 475 476 static void printIntro(PrintStream html, IAA iaa, int numberOfDocs, String fileName, Matcher matcher) { 477 html 478 .println("<p>For more detailed documentation on IAA please see the <a href=\"http://knowtator.sourceforge.net//iaa.shtml\">" 479 + "IAA documentation</a>."); 480 481 html.println("<p>"); 482 html.println("<h2>" + iaa.getSetNames().size() + "-way IAA Results</h2>"); 483 html.println("IAA calculated on " + numberOfDocs + " documents."); 484 html.println("<p><a href=\"" + fileName + ".dat\">tabular data</a>"); 485 html.println("<p>all annotations = matches + non-matches"); 486 html.println("<br> IAA = matches / all annotations"); 487 if (matcher.returnsTrivials()) { 488 html.println("<br>stingy IAA = non-trivial matches / (matches + non-matches)"); 489 html.println("<br>respectable IAA = non-trivial matches / (non-trivial matches + non-matches)"); 490 html.println("<br>non-trivial IAA = non-trivial matches / (non-trivial matches + non-trivial non-matches)"); 491 } 492 493 } 494 495 static void printTitleRowForAllwayIAA(PrintStream html, Matcher matcher) { 496 html.println("<table border=1><tr><td><b>Type</b></td>" + "<td><b>IAA</b></td>"); 497 if (matcher.returnsTrivials()) { 498 html.println("<td><b>stingy IAA</b></td>" + "<td><b>respectable IAA</b></td>" 499 + "<td><b>non-trivial IAA</b></td>"); 500 } 501 html.println("<td><b>matches</b></td>"); 502 if (matcher.returnsTrivials()) { 503 html.println("<td><b>trivial matches</b></td>" + "<td><b>non-trivial matches</b></td>"); 504 } 505 html.println("<td><b>non-matches</b></td>"); 506 if (matcher.returnsTrivials()) { 507 html.println("<td><b>trivial non-matches</b></td>" + "<td><b>non-trivial non-matches</b></td>"); 508 } 509 html.println("</tr>"); 510 } 511 512 public static String initHTML(String title, String description) { 513 StringBuffer html = new StringBuffer(); 514 html.append("<html>\n"); 515 html.append("<head><title>" + title + "</title></head>\n"); 516 html.append("<body>\n"); 517 html.append("<h1>" + title + "</h1>\n"); 518 html.append(description); 519 html 520 .append(" For more detailed documentation on IAA please see the <a href=\"http://knowtator.sourceforge.net//iaa.shtml\">IAA documentation</a>.\n"); 521 return html.toString(); 522 } 523 524 public static void printMatchData(PrintStream html, Set<String> sets, String fileName, File directory, 525 Map<String, Set<Annotation>> allwayMatches, Map<Annotation, String> annotationTexts, 526 Map<Annotation, String> annotationTextNames, Set<String> classes, IAA iaa) throws IOException 527 528 { 529 html.println("<h2>match data</h2>"); 530 html.println("<ul>"); 531 532 Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets(); 533 java.util.List<String> sortedTypes = new ArrayList<String>(classes); 534 Collections.sort(sortedTypes); 535 536 for (String set : sets) { 537 String matchesFileName = fileName + ".matches." + set + ".html"; 538 html.println("<li><a href=\"" + matchesFileName + "\">matches for " + set + "</a></li>"); 539 PrintStream matchesStream = new PrintStream(new File(directory, matchesFileName)); 540 Set<Annotation> matches = allwayMatches.get(set); 541 Map<String, Set<Annotation>> sortedMatches = IAA2HTML.sortByType(classes, matches); 542 543 matchesStream 544 .println(initHTML( 545 "Matches for " + set, 546 "Each annotation that was considered a match is shown in the text that it was found in. The matching annotations from the other annotation sets are also shown.")); 547 IAA2HTML.printInstances(matchesStream, sortedMatches, sortedTypes, annotationTexts, annotationTextNames, 548 matchSets); 549 matchesStream.flush(); 550 matchesStream.close(); 551 } 552 html.println("</ul><hr>"); 553 } 554 555 public static void printNonmatchData(PrintStream html, Set<String> sets, String fileName, File directory, 556 Map<String, Set<Annotation>> allwayNonmatches, AnnotationSpanIndex spanIndex, 557 Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames, Set<String> classes, 558 IAA iaa) throws IOException { 559 html.println("<h2>non-match data</h2>"); 560 html.println("<ul>"); 561 562 java.util.List<String> sortedTypes = new ArrayList<String>(classes); 563 Collections.sort(sortedTypes); 564 565 for (String set : sets) { 566 String errorsFileName = fileName + ".nonmatches." + set + ".html"; 567 html.println("<li><a href=\"" + errorsFileName + "\">non-matches for " + set + "</a></li>"); 568 PrintStream errors = new PrintStream(new File(directory, errorsFileName)); 569 Set<Annotation> nonmatches = allwayNonmatches.get(set); 570 Map<String, Set<Annotation>> sortedNonmatches = IAA2HTML.sortByType(classes, nonmatches); 571 572 Map<Annotation, Set<Annotation>> comparisonAnnotations = new HashMap<Annotation, Set<Annotation>>(); 573 for (Annotation nonmatch : nonmatches) { 574 comparisonAnnotations.put(nonmatch, IAA2HTML.getCandidateAnnotations(nonmatch, spanIndex)); 575 } 576 577 errors 578 .println(initHTML( 579 "Non-matches for " + set, 580 "Each annotation that was considered a non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown.")); 581 IAA2HTML.printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts, annotationTextNames, 582 comparisonAnnotations); 583 errors.flush(); 584 errors.close(); 585 586 } 587 html.println("</ul><hr>"); 588 } 589 590 public static void printPairwiseAgreement(PrintStream html, Set<String> sets, 591 Map<String, Map<String, Set<Annotation>>> pairwiseMatches, 592 Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches, NumberFormat percentageFormat) { 593 html.println("<h2>Pair-wise agreement</h2>"); 594 html.println("<table border=1><tr><td><b>Gold standard set</b></td>" + "<td><b>compared set</b></td>" 595 + "<td><b>true positives</b></td>" + "<td><b>false positives</b></td>" 596 + "<td><b>false negatives</b></td>" + "<td><b>precision</b></td>" + "<td><b>recall</b></td>" 597 + "<td><b>F-score</b></td></tr>"); 598 599 for (String setName : sets) { 600 for (String setName2 : sets) { 601 602 if (!setName.equals(setName2)) { 603 Set<Annotation> truePositives = pairwiseMatches.get(setName).get(setName2); 604 Set<Annotation> falseNegatives = pairwiseNonmatches.get(setName).get(setName2); 605 Set<Annotation> falsePositives = pairwiseNonmatches.get(setName2).get(setName); 606 double precision = (double) truePositives.size() 607 / ((double) truePositives.size() + (double) falsePositives.size()); 608 double recall = (double) truePositives.size() 609 / ((double) truePositives.size() + (double) falseNegatives.size()); 610 double f_score = ((double) 2 * precision * recall) / (recall + precision); 611 612 html.println("<tr><td>" + setName + "</td>" + "<td>" + setName2 + "</td>" + "<td>" 613 + truePositives.size() + "</td>" + "<td>" + falsePositives.size() + "</td>" + "<td>" 614 + falseNegatives.size() + "</td>" + "<td>" + percentageFormat.format(precision) + "</td>" 615 + "<td>" + percentageFormat.format(recall) + "</td>" + "<td>" 616 + percentageFormat.format(f_score) + "</td></tr>"); 617 } 618 } 619 } 620 html.println("</table>"); 621 html.println("Precision and recall are given equal weight for the F-score."); 622 } 623 624 } 625 626 // for(String type : sortedTypes) 627 // { 628 // errors.println("<h2>"+type+"</h2>"); 629 // Set<Annotation> typeNonmatches = sortedNonmatches.get(type); 630 // for(Annotation annotation : typeNonmatches) 631 // { 632 // String docID = annotation.getDocID(); 633 // writeAnnotationTextSourceHTML(errors, annotation, 634 // annotationTexts.get(annotation), annotationTextNames.get(annotation)); 635 // errors.println("<ul><li>"); 636 // printAnnotationHTML(errors, annotation, annotationTexts.get(annotation)); 637 // 638 // Set<Annotation> candidateAnnotations = 639 // spanIndex.getOverlappingAnnotations(annotation); 640 // for(Annotation candidateAnnotation : candidateAnnotations) 641 // { 642 // String candidateAnnotationSet = candidateAnnotation.getSetName(); 643 // if(!candidateAnnotationSet.equals(set)) 644 // { 645 // String candidateDocID = candidateAnnotation.getDocID(); 646 // if(candidateDocID.equals(docID)) 647 // { 648 // errors.println("<li>"); 649 // printAnnotationHTML(errors, candidateAnnotation, 650 // annotationTexts.get(candidateAnnotation)); 651 // } 652 // } 653 // } 654 // errors.println("</ul>"); 655 // } 656 // } 657 658 // for(String set : sets) 659 // { 660 // String matchesFileName = fileName+".matches."+set+".html"; 661 //html.println("<li><a href=\""+matchesFileName+"\">matches for "+set+"</a></li>" 662 // ); 663 // PrintStream matchesStream = new PrintStream(new File(directory, 664 // matchesFileName)); 665 // Set<Annotation> matches = allwayMatches.get(set); 666 // Map<String, Set<Annotation>> sortedMatches = sortByType(classes, matches); 667 // Map<Annotation, Set<Annotation>> matchSets = new HashMap<Annotation, 668 // Set<Annotation>>(); 669 // for(Annotation nonmatch : nonmatches) 670 // { 671 // comparisonAnnotations.put(nonmatch, getCandidateAnnotations(nonmatch, 672 // spanIndex)); 673 // } 674 // 675 // initHTML(errors, "Non-matches for "+set, fileName+".html", fileName, 676 // "Each annotation that was considered a non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown." 677 // ); 678 // printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts, 679 // annotationTextNames, comparisonAnnotations); 680 // errors.flush(); errors.close(); 681 // } 682 683 // String matchesFileName = fileName+".matches.html"; 684 // String trivialMatchesFileName = fileName+".trivial.matches.html"; 685 // String nontrivialMatchesFileName = fileName+".nontrivial.matches.html"; 686 // PrintStream matchesStream = new PrintStream(new File(directory, 687 // matchesFileName)); 688 // PrintStream trivialMatchesStream = new PrintStream(new File(directory, 689 // trivialMatchesFileName)); 690 // PrintStream nontrivialMatchesStream = new PrintStream(new File(directory, 691 // nontrivialMatchesFileName)); 692 // 693 // html.println("<h2>match data</h2>"); 694 // html.println("<ul><li><a href=\""+matchesFileName+"\">Matches</a></li>"); 695 // if(matcher.returnsTrivials()) 696 // { html.println("<li><a href=\""+trivialMatchesFileName+ 697 // "\">Trivial matches</a></li>"); 698 // html.println("<li><a href=\""+nontrivialMatchesFileName+ 699 // "\">Non-trivial matches</a></li>"); 700 // } 701 // html.println("</ul>"); 702 // 703 // Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets(); 704 // initHTML(matchesStream, "Matches", fileName+".html", fileName, 705 // "Each annotation that was considered a match is shown in the text that it was found in. Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences. Only one of the annotation's spans are bolded in the text." 706 // ); 707 // if(matcher.returnsTrivials()) 708 // { 709 // initHTML(trivialMatchesStream, "Trivial matches", fileName+".html", fileName, 710 // "Each annotation that was considered a match is shown in the text that it was found in. Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences. Only one of the annotation's spans are bolded in the text." 711 // ); 712 // initHTML(nontrivialMatchesStream, "Non-trivial matches", fileName+".html", 713 // fileName, 714 // "Each annotation that was considered a match is shown in the text that it was found in. Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences. Only one of the annotation's span is bolded in the text." 715 // ); 716 // } 717 // 718 // Set<Annotation> printedAnnotations = new HashSet<Annotation>(); 719 // for(String type : sortedTypes) 720 // { 721 // matchesStream.println("<h2>"+type+"</h2>"); 722 // trivialMatchesStream.println("<h2>"+type+"</h2>"); 723 // nontrivialMatchesStream.println("<h2>"+type+"</h2>"); 724 // 725 // Set<Annotation> typeTrivialMatches = sortedAllwayTrivialMatches.get(type); 726 // Set<Annotation> typeNontrivialMatches = 727 // sortedAllwayNontrivialMatches.get(type); 728 // Set<Annotation> typeMatches = sortedAllwayMatches.get(type); 729 // 730 // for(Annotation annotation : typeMatches) 731 // { 732 // if(printedAnnotations.contains(annotation)) continue; 733 // Set<Annotation> matchSet = matchSets.get(annotation); 734 // 735 // writeAnnotationTextSourceHTML(matchesStream, annotation, 736 // annotationTexts.get(annotation), annotationTextNames.get(annotation)); 737 // matchesStream.println("<p>"); 738 // printAnnotationHTML(matchesStream, annotation, 739 // annotationTexts.get(annotation)); 740 // if(typeTrivialMatches.contains(annotation) && matcher.returnsTrivials()) 741 // { 742 // writeAnnotationTextSourceHTML(trivialMatchesStream, annotation, 743 // annotationTexts.get(annotation), annotationTextNames.get(annotation)); 744 // trivialMatchesStream.println("<p>"); 745 // printAnnotationHTML(trivialMatchesStream, annotation, 746 // annotationTexts.get(annotation)); 747 // } 748 // else if(typeNontrivialMatches.contains(annotation) && 749 // matcher.returnsTrivials()) 750 // { 751 // writeAnnotationTextSourceHTML(nontrivialMatchesStream, annotation, 752 // annotationTexts.get(annotation), annotationTextNames.get(annotation)); 753 // nontrivialMatchesStream.println("<p>"); 754 // printAnnotationHTML(nontrivialMatchesStream, annotation, 755 // annotationTexts.get(annotation)); 756 // } 757 // 758 // printedAnnotations.add(annotation); 759 // for(Annotation matchedAnnotation : matchSet) 760 // { 761 // if(!matchedAnnotation.equals(annotation)) 762 // { 763 // printAnnotationHTML(matchesStream, matchedAnnotation, 764 // annotationTexts.get(matchedAnnotation)); 765 // if(typeTrivialMatches.contains(matchedAnnotation) && 766 // matcher.returnsTrivials()) 767 // { 768 // printAnnotationHTML(trivialMatchesStream, matchedAnnotation, 769 // annotationTexts.get(matchedAnnotation)); 770 // } 771 // else if(typeNontrivialMatches.contains(matchedAnnotation) && 772 // matcher.returnsTrivials()) 773 // { 774 // printAnnotationHTML(nontrivialMatchesStream, matchedAnnotation, 775 // annotationTexts.get(matchedAnnotation)); 776 // } 777 // printedAnnotations.add(matchedAnnotation); 778 // } 779 // } 780 // } 781 // } 782 // matchesStream.flush(); matchesStream.close(); 783 // trivialMatchesStream.flush(); trivialMatchesStream.close(); 784 // nontrivialMatchesStream.flush(); nontrivialMatchesStream.close(); 785 //