001 /*
002 * The contents of this file are subject to the Mozilla Public
003 * License Version 1.1 (the "License"); you may not use this file
004 * except in compliance with the License. You may obtain a copy of
005 * the License at http://www.mozilla.org/MPL/
006 *
007 * Software distributed under the License is distributed on an "AS
008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009 * implied. See the License for the specific language governing
010 * rights and limitations under the License.
011 *
012 * The Original Code is Knowtator.
013 *
014 * The Initial Developer of the Original Code is University of Colorado.
015 * Copyright (C) 2005 - 2008. All Rights Reserved.
016 *
017 * Knowtator was developed by the Center for Computational Pharmacology
018 * (http://compbio.uchcs.edu) at the University of Colorado Health
019 * Sciences Center School of Medicine with support from the National
020 * Library of Medicine.
021 *
022 * Current information about Knowtator can be obtained at
023 * http://knowtator.sourceforge.net/
024 *
025 * Contributor(s):
026 * Philip V. Ogren <philip@ogren.info> (Original Author)
027 */
028
029 package edu.uchsc.ccp.iaa.html;
030
031 import java.io.File;
032 import java.io.IOException;
033 import java.io.PrintStream;
034 import java.text.NumberFormat;
035 import java.util.ArrayList;
036 import java.util.Collection;
037 import java.util.Collections;
038 import java.util.HashMap;
039 import java.util.HashSet;
040 import java.util.List;
041 import java.util.Map;
042 import java.util.Set;
043
044 import edu.uchsc.ccp.iaa.Annotation;
045 import edu.uchsc.ccp.iaa.AnnotationSpanIndex;
046 import edu.uchsc.ccp.iaa.IAA;
047 import edu.uchsc.ccp.iaa.Span;
048 import edu.uchsc.ccp.iaa.matcher.Matcher;
049
050 public class IAA2HTML {
051
052 public static void printIAA(IAA iaa, Matcher matcher, File directory, int numberOfDocs,
053 Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames) throws Exception {
054 NumberFormat percentageFormat = NumberFormat.getPercentInstance();
055 percentageFormat.setMinimumFractionDigits(2);
056
057 String fileName = matcher.getName();
058
059 PrintStream tabular = new PrintStream(new File(directory, fileName + ".dat"));
060 PrintStream html = new PrintStream(new File(directory, fileName + ".html"));
061
062 initHTML(html, matcher.getName(), null, null, matcher.getDescription());
063
064 printIntro(html, iaa, numberOfDocs, fileName, matcher);
065 html.println("<p>");
066 printTitleRowForAllwayIAA(html, matcher);
067
068 tabular
069 .println("This file is provided to facilitate cut-n-paste into a spreadsheet.\n"
070 + "If you cannot directly copy the data below into a spreadsheet without it all going into a single cell,\n"
071 + "then try copying to a text editor first and then copy it from there. There is typically a 'paste special'\n"
072 + "option under the Edit menu that will allow you to paste the copied data as text. This will also work.\n\n\n");
073
074 if (matcher.returnsTrivials())
075 tabular
076 .println("type\tmatches\ttrivial matches\tnon-trivial matches\tnon-matches\ttrivial non-matches\tnon-trivial non-matches");
077 else
078 tabular.println("type\tmatches\tnon-matches");
079
080 Set<String> classes = iaa.getAnnotationClasses();
081 Set<String> sets = iaa.getSetNames();
082
083 Map<String, Set<Annotation>> allwayMatches = iaa.getAllwayMatches();
084 Map<String, Set<Annotation>> nontrivialAllwayMatches = iaa.getNontrivialAllwayMatches();
085 Map<String, Set<Annotation>> trivialAllwayMatches = iaa.getTrivialAllwayMatches();
086 Map<String, Set<Annotation>> allwayNonmatches = iaa.getAllwayNonmatches();
087 Map<String, Set<Annotation>> nontrivialAllwayNonmatches = iaa.getNontrivialAllwayNonmatches();
088 Map<String, Set<Annotation>> trivialAllwayNonmatches = iaa.getTrivialAllwayNonmatches();
089
090 Set<Annotation> allwayMatchesSingleSet = getSingleSet(allwayMatches);
091 Set<Annotation> trivialAllwayMatchesSingleSet = getSingleSet(trivialAllwayMatches);
092 Set<Annotation> nontrivialAllwayMatchesSingleSet = getSingleSet(nontrivialAllwayMatches);
093 Set<Annotation> allwayNonmatchesSingleSet = getSingleSet(allwayNonmatches);
094 Set<Annotation> trivialAllwayNonmatchesSingleSet = getSingleSet(trivialAllwayNonmatches);
095 Set<Annotation> nontrivialAllwayNonmatchesSingleSet = getSingleSet(nontrivialAllwayNonmatches);
096
097 AnnotationSpanIndex spanIndex = new AnnotationSpanIndex(allwayNonmatchesSingleSet);
098
099 int totalAllwayMatches = allwayMatchesSingleSet.size();
100 int totalTrivialAllwayMatches = trivialAllwayMatchesSingleSet.size();
101 int totalNontrivialAllwayMatches = nontrivialAllwayMatchesSingleSet.size();
102 int totalAllwayNonmatches = allwayNonmatchesSingleSet.size();
103 int totalTrivialAllwayNonmatches = trivialAllwayNonmatchesSingleSet.size();
104 int totalNontrivialAllwayNonmatches = nontrivialAllwayNonmatchesSingleSet.size();
105
106 double iaaScore = (double) totalAllwayMatches / ((double) totalAllwayMatches + (double) totalAllwayNonmatches);
107 double stingyIAAScore = (double) totalNontrivialAllwayMatches
108 / ((double) totalAllwayMatches + (double) totalAllwayNonmatches);
109 double respectableIAAScore = (double) totalNontrivialAllwayMatches
110 / ((double) totalNontrivialAllwayMatches + (double) totalAllwayNonmatches);
111 double nontrivialIAAScore = (double) totalNontrivialAllwayMatches
112 / ((double) totalNontrivialAllwayMatches + (double) totalNontrivialAllwayNonmatches);
113
114 if (matcher.returnsTrivials()) {
115 html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>"
116 + "<td>" + percentageFormat.format(stingyIAAScore) + "</td>" + "<td>"
117 + percentageFormat.format(respectableIAAScore) + "</td>" + "<td>"
118 + percentageFormat.format(nontrivialIAAScore) + "</td>" + "<td>" + totalAllwayMatches + "</td>"
119 + "<td>" + totalTrivialAllwayMatches + "</td>" + "<td>" + totalNontrivialAllwayMatches + "</td>"
120 + "<td>" + totalAllwayNonmatches + "</td>" + "<td>" + totalTrivialAllwayNonmatches + "</td>"
121 + "<td>" + totalNontrivialAllwayNonmatches + "</td></tr>");
122 tabular.println("All classes\t" + totalAllwayMatches + "\t" + totalTrivialAllwayMatches + "\t"
123 + totalNontrivialAllwayMatches + "\t" + totalAllwayNonmatches + "\t" + totalTrivialAllwayNonmatches
124 + "\t" + totalNontrivialAllwayNonmatches);
125 } else {
126 html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>"
127 + "<td>" + totalAllwayMatches + "</td>" + "<td>" + totalAllwayNonmatches + "</td></tr>");
128 tabular.println("All classes\t" + totalAllwayMatches + "\t" + totalAllwayNonmatches);
129 }
130
131 Map<String, Set<Annotation>> sortedAllwayMatches = sortByType(classes, allwayMatchesSingleSet);
132 Map<String, Set<Annotation>> sortedAllwayTrivialMatches = sortByType(classes, trivialAllwayMatchesSingleSet);
133 Map<String, Set<Annotation>> sortedAllwayNontrivialMatches = sortByType(classes,
134 nontrivialAllwayMatchesSingleSet);
135 Map<String, Set<Annotation>> sortedAllwayNonmatches = sortByType(classes, allwayNonmatchesSingleSet);
136 Map<String, Set<Annotation>> sortedAllwayTrivialNonmatches = sortByType(classes,
137 trivialAllwayNonmatchesSingleSet);
138 Map<String, Set<Annotation>> sortedAllwayNontrivialNonmatches = sortByType(classes,
139 nontrivialAllwayNonmatchesSingleSet);
140
141 java.util.List<String> sortedTypes = new ArrayList<String>(classes);
142 Collections.sort(sortedTypes);
143
144 for (String type : sortedTypes) {
145 int classMatches = sortedAllwayMatches.get(type).size();
146 int classTrivialMatches = sortedAllwayTrivialMatches.get(type).size();
147 int classNontrivialMatches = sortedAllwayNontrivialMatches.get(type).size();
148 int classNonmatches = sortedAllwayNonmatches.get(type).size();
149 int classTrivialNonmatches = sortedAllwayTrivialNonmatches.get(type).size();
150 int classNontrivialNonmatches = sortedAllwayNontrivialNonmatches.get(type).size();
151
152 iaaScore = (double) classMatches / ((double) classMatches + (double) classNonmatches);
153 stingyIAAScore = (double) classNontrivialMatches / ((double) classMatches + (double) classNonmatches);
154 respectableIAAScore = (double) classNontrivialMatches
155 / ((double) classNontrivialMatches + (double) classNonmatches);
156 nontrivialIAAScore = (double) classNontrivialMatches
157 / ((double) classNontrivialMatches + (double) classNontrivialNonmatches);
158
159 if (matcher.returnsTrivials()) {
160 html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>"
161 + "<td>" + percentageFormat.format(stingyIAAScore) + "</td>" + "<td>"
162 + percentageFormat.format(respectableIAAScore) + "</td>" + "<td>"
163 + percentageFormat.format(nontrivialIAAScore) + "</td>" + "<td>" + classMatches + "</td>"
164 + "<td>" + classTrivialMatches + "</td>" + "<td>" + classNontrivialMatches + "</td>" + "<td>"
165 + classNonmatches + "</td>" + "<td>" + classTrivialNonmatches + "</td>" + "<td>"
166 + classNontrivialNonmatches + "</td></tr>");
167 tabular.println(type + "\t" + classMatches + "\t" + classTrivialMatches + "\t" + classNontrivialMatches
168 + "\t" + classNonmatches + "\t" + classTrivialNonmatches + "\t" + classNontrivialNonmatches);
169 } else {
170 html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>"
171 + "<td>" + classMatches + "</td>" + "<td>" + classNonmatches + "</td></tr>");
172 tabular.println(type + "\t" + classMatches + "\t" + classNonmatches);
173 }
174 }
175 html.println("</table>");
176
177 printMatchData(html, sets, fileName, directory, allwayMatches, classes, spanIndex, sortedTypes,
178 annotationTexts, annotationTextNames, matcher, trivialAllwayMatches, nontrivialAllwayMatches, iaa);
179
180 printNonmatchData(html, sets, fileName, directory, allwayNonmatches, classes, spanIndex, sortedTypes,
181 annotationTexts, annotationTextNames, matcher, trivialAllwayNonmatches, nontrivialAllwayNonmatches);
182
183 Map<String, Map<String, Set<Annotation>>> pairwiseMatches = iaa.getPairwiseMatches();
184 Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches = iaa.getPairwiseNonmatches();
185
186 printPairwiseAgreement(html, sets, pairwiseMatches, pairwiseNonmatches, percentageFormat);
187
188 html.flush();
189 html.close();
190 tabular.flush();
191 tabular.close();
192 }
193
194 private static void printMatchData(PrintStream html, Set<String> sets, String fileName, File directory,
195 Map<String, Set<Annotation>> allwayMatches, Set<String> classes, AnnotationSpanIndex spanIndex,
196 List<String> sortedTypes, Map<Annotation, String> annotationTexts,
197 Map<Annotation, String> annotationTextNames, Matcher matcher,
198 Map<String, Set<Annotation>> trivialAllwayMatches, Map<String, Set<Annotation>> nontrivialAllwayMatches,
199 IAA iaa) throws Exception {
200 html.println("<h2>match data</h2>");
201 html.println("<ul>");
202
203 Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets();
204
205 for (String set : sets) {
206 String matchesFileName = fileName + ".matches." + set + ".html";
207 html.println("<li><a href=\"" + matchesFileName + "\">matches for " + set + "</a></li>");
208 PrintStream matchesStream = new PrintStream(new File(directory, matchesFileName));
209 Set<Annotation> matches = allwayMatches.get(set);
210 Map<String, Set<Annotation>> sortedMatches = sortByType(classes, matches);
211
212 initHTML(
213 matchesStream,
214 "Matches for " + set,
215 fileName + ".html",
216 fileName,
217 "Each annotation that was considered a match is shown in the text that it was found in. The matching annotations from the other annotation sets are also shown.");
218 printInstances(matchesStream, sortedMatches, sortedTypes, annotationTexts, annotationTextNames, matchSets);
219 matchesStream.flush();
220 matchesStream.close();
221
222 if (matcher.returnsTrivials()) {
223 String trivialMatchesFileName = fileName + ".trivial.matches." + set + ".html";
224 html.println("<li><a href=\"" + trivialMatchesFileName + "\">trivial matches for " + set + "</a></li>");
225 PrintStream trivialMatchesStream = new PrintStream(new File(directory, trivialMatchesFileName));
226 Set<Annotation> trivialMatches = trivialAllwayMatches.get(set);
227 Map<String, Set<Annotation>> sortedTrivialMatches = sortByType(classes, trivialMatches);
228 initHTML(
229 trivialMatchesStream,
230 "Trivial matches for " + set,
231 fileName + ".html",
232 fileName,
233 "Each annotation that was considered a trival match is shown in the text that it was found in. The matching annotations from the other annotation sets are also shown.");
234 printInstances(trivialMatchesStream, sortedTrivialMatches, sortedTypes, annotationTexts,
235 annotationTextNames, matchSets);
236 trivialMatchesStream.flush();
237 trivialMatchesStream.close();
238
239 String nontrivialMatchesFileName = fileName + ".nontrivial.matches." + set + ".html";
240 html.println("<li><a href=\"" + nontrivialMatchesFileName + "\">non-trivial matches for " + set
241 + "</a></li>");
242 PrintStream nontrivialMatchesStream = new PrintStream(new File(directory, nontrivialMatchesFileName));
243 Set<Annotation> nontrivialMatches = nontrivialAllwayMatches.get(set);
244 Map<String, Set<Annotation>> sortedNontrivialMatches = sortByType(classes, nontrivialMatches);
245 initHTML(
246 nontrivialMatchesStream,
247 "non-trivial non-matches for " + set,
248 fileName + ".html",
249 fileName,
250 "Each annotation that was considered a non-trival match is shown in the text that it was found in. The matching from the other annotation sets are also shown.");
251 printInstances(nontrivialMatchesStream, sortedNontrivialMatches, sortedTypes, annotationTexts,
252 annotationTextNames, matchSets);
253 nontrivialMatchesStream.flush();
254 nontrivialMatchesStream.close();
255 }
256 }
257 html.println("</ul><hr>");
258
259 }
260
261 private static void printNonmatchData(PrintStream html, Set<String> sets, String fileName, File directory,
262 Map<String, Set<Annotation>> allwayNonmatches, Set<String> classes, AnnotationSpanIndex spanIndex,
263 List<String> sortedTypes, Map<Annotation, String> annotationTexts,
264 Map<Annotation, String> annotationTextNames, Matcher matcher,
265 Map<String, Set<Annotation>> trivialAllwayNonmatches,
266 Map<String, Set<Annotation>> nontrivialAllwayNonmatches) throws Exception {
267 html.println("<h2>non-match data</h2>");
268 html.println("<ul>");
269
270 for (String set : sets) {
271 String errorsFileName = fileName + ".nonmatches." + set + ".html";
272 html.println("<li><a href=\"" + errorsFileName + "\">non-matches for " + set + "</a></li>");
273 PrintStream errors = new PrintStream(new File(directory, errorsFileName));
274 Set<Annotation> nonmatches = allwayNonmatches.get(set);
275 Map<String, Set<Annotation>> sortedNonmatches = sortByType(classes, nonmatches);
276
277 Map<Annotation, Set<Annotation>> comparisonAnnotations = new HashMap<Annotation, Set<Annotation>>();
278 for (Annotation nonmatch : nonmatches) {
279 comparisonAnnotations.put(nonmatch, getCandidateAnnotations(nonmatch, spanIndex));
280 }
281
282 initHTML(
283 errors,
284 "Non-matches for " + set,
285 fileName + ".html",
286 fileName,
287 "Each annotation that was considered a non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown.");
288 printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts, annotationTextNames,
289 comparisonAnnotations);
290 errors.flush();
291 errors.close();
292
293 if (matcher.returnsTrivials()) {
294 String trivialNonMatchesFileName = fileName + ".trivial.nonmatches." + set + ".html";
295 html.println("<li><a href=\"" + trivialNonMatchesFileName + "\">trivial non-matches for " + set
296 + "</a></li>");
297 PrintStream trivialErrors = new PrintStream(new File(directory, trivialNonMatchesFileName));
298 Set<Annotation> trivialNonmatches = trivialAllwayNonmatches.get(set);
299 Map<String, Set<Annotation>> sortedTrivialNonmatches = sortByType(classes, trivialNonmatches);
300 initHTML(
301 trivialErrors,
302 "Trivial non-matches for " + set,
303 fileName + ".html",
304 fileName,
305 "Each annotation that was considered a trival non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown.");
306 printInstances(trivialErrors, sortedTrivialNonmatches, sortedTypes, annotationTexts,
307 annotationTextNames, comparisonAnnotations);
308 trivialErrors.flush();
309 trivialErrors.close();
310
311 String nontrivialNonMatchesFileName = fileName + ".nontrivial.nonmatches." + set + ".html";
312 html.println("<li><a href=\"" + nontrivialNonMatchesFileName + "\">non-trivial non-matches for " + set
313 + "</a></li>");
314 PrintStream nontrivialErrors = new PrintStream(new File(directory, nontrivialNonMatchesFileName));
315 Set<Annotation> nontrivialNonmatches = nontrivialAllwayNonmatches.get(set);
316 Map<String, Set<Annotation>> sortedNontrivialNonmatches = sortByType(classes, nontrivialNonmatches);
317 initHTML(
318 nontrivialErrors,
319 "non-trivial non-matches for " + set,
320 fileName + ".html",
321 fileName,
322 "Each annotation that was considered a non-trival non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown.");
323 printInstances(nontrivialErrors, sortedNontrivialNonmatches, sortedTypes, annotationTexts,
324 annotationTextNames, comparisonAnnotations);
325 nontrivialErrors.flush();
326 nontrivialErrors.close();
327 }
328 }
329 html.println("</ul><hr>");
330 }
331
332 public static Set<Annotation> getCandidateAnnotations(Annotation annotation, AnnotationSpanIndex spanIndex) {
333 Set<Annotation> candidateAnnotations = new HashSet<Annotation>();
334 String set = annotation.getSetName();
335 String docID = annotation.getDocID();
336
337 Set<Annotation> overlappingAnnotations = spanIndex.getOverlappingAnnotations(annotation);
338 for (Annotation overlappingAnnotation : overlappingAnnotations) {
339 String candidateAnnotationSet = overlappingAnnotation.getSetName();
340 if (!candidateAnnotationSet.equals(set)) {
341 String candidateDocID = overlappingAnnotation.getDocID();
342 if (candidateDocID.equals(docID)) {
343 candidateAnnotations.add(overlappingAnnotation);
344 }
345 }
346 }
347 return candidateAnnotations;
348 }
349
350 public static void printInstances(PrintStream out, Map<String, Set<Annotation>> sortedAnnotations,
351 java.util.List<String> sortedTypes, Map<Annotation, String> annotationTexts,
352 Map<Annotation, String> annotationTextNames, Map<Annotation, Set<Annotation>> comparisonAnnotations) {
353 for (String type : sortedTypes) {
354 out.println("<h2>" + type + "</h2>");
355 Set<Annotation> typeAnnotations = sortedAnnotations.get(type);
356 for (Annotation annotation : typeAnnotations) {
357 writeAnnotationTextSourceHTML(out, annotation, annotationTexts.get(annotation), annotationTextNames
358 .get(annotation));
359 out.println("<ul><li>");
360 printAnnotationHTML(out, annotation, annotationTexts.get(annotation));
361
362 Set<Annotation> comparisons = comparisonAnnotations.get(annotation);
363 if (comparisons != null) {
364 for (Annotation comparisonAnnotation : comparisons) {
365 if (!comparisonAnnotation.equals(annotation)) {
366 out.println("<li>");
367 printAnnotationHTML(out, comparisonAnnotation, annotationTexts.get(comparisonAnnotation));
368 }
369 }
370 }
371 out.println("</ul>");
372 }
373 }
374 }
375
376 public static Set<Annotation> getSingleSet(Map<String, Set<Annotation>> annotations) {
377 Set<Annotation> returnValues = new HashSet<Annotation>();
378 for (String setName : annotations.keySet()) {
379 returnValues.addAll(annotations.get(setName));
380 }
381 return returnValues;
382 }
383
384 public static void initHTML(PrintStream html, String title, String link, String linkLabel, String description) {
385 html.println("<html>");
386 html.println("<head><title>" + title + "</title></head>");
387 html.println("<body>");
388 if (link != null)
389 html.println("<a href=\"" + link + "\">" + linkLabel + "</a>");
390 html.println("<h1>" + title + "</h1>");
391 html.println(description);
392 html.println("<hr>");
393 }
394
395 public static Map<String, Set<Annotation>> sortByType(Set<String> types, Collection<Annotation> annotations) {
396 Map<String, Set<Annotation>> sortedAnnotations = new HashMap<String, Set<Annotation>>();
397
398 for (String type : types) {
399 sortedAnnotations.put(type, new HashSet<Annotation>());
400 }
401 for (Annotation annotation : annotations) {
402 String type = annotation.getAnnotationClass();
403 if (type != null)
404 sortedAnnotations.get(type).add(annotation);
405 }
406 return sortedAnnotations;
407 }
408
409 static void writeAnnotationTextSourceHTML(PrintStream out, Annotation annotation, String annotationText,
410 String annotationTextName) {
411 StringBuffer html = new StringBuffer("<hr><p>");
412 if (annotationTextName != null)
413 html.append("Text source name = " + annotationTextName + "<p>");
414
415 if (annotationText != null) {
416 java.util.List<Span> spans = annotation.getSpans();
417 java.util.List<Span> modifiedSpans = new ArrayList<Span>(spans);
418
419 if (spans == null || spans.size() == 0) {
420 // spans should be the recursive spans
421 }
422 annotationText = shortenText(annotationText, modifiedSpans);
423
424 int mark = 0;
425
426 for (Span span : modifiedSpans) {
427 try {
428 html.append(annotationText.substring(mark, span.getStart()) + "<b>");
429 html.append(Span.substring(annotationText, span) + "</b>");
430 mark = span.getEnd();
431 } catch (StringIndexOutOfBoundsException sioobe) {
432 sioobe.printStackTrace();
433 System.out.println("annotationText=" + annotationText);
434 System.out.println("annotation = " + annotation.getSpans().get(0));
435 System.out.println("annotationTextName = " + annotationTextName);
436
437 }
438
439 }
440 if (mark < annotationText.length())
441 html.append(annotationText.substring(mark));
442 }
443 out.println(html.toString());
444 }
445
446 private static String shortenText(String text, java.util.List<Span> spans) {
447 int frontBuffer = 150;
448 int endBuffer = 150;
449 if (spans.size() > 0) {
450 Span span = spans.get(0);
451 int start = Math.max(0, span.getStart() - frontBuffer);
452 int end = Math.min(text.length(), span.getEnd() + endBuffer);
453 String substring = text.substring(start, end);
454
455 for (int i = 0; i < spans.size(); i++) {
456 span = spans.get(i);
457 Span offsetSpan = new Span(span.getStart() - start, span.getEnd() - start);
458 spans.set(i, offsetSpan);
459 }
460 return substring;
461 }
462 return text;
463 }
464
465 static void printAnnotationHTML(PrintStream out, Annotation annotation, String annotationText) {
466 StringBuffer html = new StringBuffer();
467
468 if (annotationText != null) {
469 String coveredText = Annotation.getCoveredText(annotation, annotationText, " ... ");
470 html.append(coveredText);
471 }
472 html.append(" " + annotation.toHTML());
473 out.print(html.toString());
474 }
475
476 static void printIntro(PrintStream html, IAA iaa, int numberOfDocs, String fileName, Matcher matcher) {
477 html
478 .println("<p>For more detailed documentation on IAA please see the <a href=\"http://knowtator.sourceforge.net//iaa.shtml\">"
479 + "IAA documentation</a>.");
480
481 html.println("<p>");
482 html.println("<h2>" + iaa.getSetNames().size() + "-way IAA Results</h2>");
483 html.println("IAA calculated on " + numberOfDocs + " documents.");
484 html.println("<p><a href=\"" + fileName + ".dat\">tabular data</a>");
485 html.println("<p>all annotations = matches + non-matches");
486 html.println("<br> IAA = matches / all annotations");
487 if (matcher.returnsTrivials()) {
488 html.println("<br>stingy IAA = non-trivial matches / (matches + non-matches)");
489 html.println("<br>respectable IAA = non-trivial matches / (non-trivial matches + non-matches)");
490 html.println("<br>non-trivial IAA = non-trivial matches / (non-trivial matches + non-trivial non-matches)");
491 }
492
493 }
494
495 static void printTitleRowForAllwayIAA(PrintStream html, Matcher matcher) {
496 html.println("<table border=1><tr><td><b>Type</b></td>" + "<td><b>IAA</b></td>");
497 if (matcher.returnsTrivials()) {
498 html.println("<td><b>stingy IAA</b></td>" + "<td><b>respectable IAA</b></td>"
499 + "<td><b>non-trivial IAA</b></td>");
500 }
501 html.println("<td><b>matches</b></td>");
502 if (matcher.returnsTrivials()) {
503 html.println("<td><b>trivial matches</b></td>" + "<td><b>non-trivial matches</b></td>");
504 }
505 html.println("<td><b>non-matches</b></td>");
506 if (matcher.returnsTrivials()) {
507 html.println("<td><b>trivial non-matches</b></td>" + "<td><b>non-trivial non-matches</b></td>");
508 }
509 html.println("</tr>");
510 }
511
512 public static String initHTML(String title, String description) {
513 StringBuffer html = new StringBuffer();
514 html.append("<html>\n");
515 html.append("<head><title>" + title + "</title></head>\n");
516 html.append("<body>\n");
517 html.append("<h1>" + title + "</h1>\n");
518 html.append(description);
519 html
520 .append(" For more detailed documentation on IAA please see the <a href=\"http://knowtator.sourceforge.net//iaa.shtml\">IAA documentation</a>.\n");
521 return html.toString();
522 }
523
524 public static void printMatchData(PrintStream html, Set<String> sets, String fileName, File directory,
525 Map<String, Set<Annotation>> allwayMatches, Map<Annotation, String> annotationTexts,
526 Map<Annotation, String> annotationTextNames, Set<String> classes, IAA iaa) throws IOException
527
528 {
529 html.println("<h2>match data</h2>");
530 html.println("<ul>");
531
532 Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets();
533 java.util.List<String> sortedTypes = new ArrayList<String>(classes);
534 Collections.sort(sortedTypes);
535
536 for (String set : sets) {
537 String matchesFileName = fileName + ".matches." + set + ".html";
538 html.println("<li><a href=\"" + matchesFileName + "\">matches for " + set + "</a></li>");
539 PrintStream matchesStream = new PrintStream(new File(directory, matchesFileName));
540 Set<Annotation> matches = allwayMatches.get(set);
541 Map<String, Set<Annotation>> sortedMatches = IAA2HTML.sortByType(classes, matches);
542
543 matchesStream
544 .println(initHTML(
545 "Matches for " + set,
546 "Each annotation that was considered a match is shown in the text that it was found in. The matching annotations from the other annotation sets are also shown."));
547 IAA2HTML.printInstances(matchesStream, sortedMatches, sortedTypes, annotationTexts, annotationTextNames,
548 matchSets);
549 matchesStream.flush();
550 matchesStream.close();
551 }
552 html.println("</ul><hr>");
553 }
554
555 public static void printNonmatchData(PrintStream html, Set<String> sets, String fileName, File directory,
556 Map<String, Set<Annotation>> allwayNonmatches, AnnotationSpanIndex spanIndex,
557 Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames, Set<String> classes,
558 IAA iaa) throws IOException {
559 html.println("<h2>non-match data</h2>");
560 html.println("<ul>");
561
562 java.util.List<String> sortedTypes = new ArrayList<String>(classes);
563 Collections.sort(sortedTypes);
564
565 for (String set : sets) {
566 String errorsFileName = fileName + ".nonmatches." + set + ".html";
567 html.println("<li><a href=\"" + errorsFileName + "\">non-matches for " + set + "</a></li>");
568 PrintStream errors = new PrintStream(new File(directory, errorsFileName));
569 Set<Annotation> nonmatches = allwayNonmatches.get(set);
570 Map<String, Set<Annotation>> sortedNonmatches = IAA2HTML.sortByType(classes, nonmatches);
571
572 Map<Annotation, Set<Annotation>> comparisonAnnotations = new HashMap<Annotation, Set<Annotation>>();
573 for (Annotation nonmatch : nonmatches) {
574 comparisonAnnotations.put(nonmatch, IAA2HTML.getCandidateAnnotations(nonmatch, spanIndex));
575 }
576
577 errors
578 .println(initHTML(
579 "Non-matches for " + set,
580 "Each annotation that was considered a non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown."));
581 IAA2HTML.printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts, annotationTextNames,
582 comparisonAnnotations);
583 errors.flush();
584 errors.close();
585
586 }
587 html.println("</ul><hr>");
588 }
589
590 public static void printPairwiseAgreement(PrintStream html, Set<String> sets,
591 Map<String, Map<String, Set<Annotation>>> pairwiseMatches,
592 Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches, NumberFormat percentageFormat) {
593 html.println("<h2>Pair-wise agreement</h2>");
594 html.println("<table border=1><tr><td><b>Gold standard set</b></td>" + "<td><b>compared set</b></td>"
595 + "<td><b>true positives</b></td>" + "<td><b>false positives</b></td>"
596 + "<td><b>false negatives</b></td>" + "<td><b>precision</b></td>" + "<td><b>recall</b></td>"
597 + "<td><b>F-score</b></td></tr>");
598
599 for (String setName : sets) {
600 for (String setName2 : sets) {
601
602 if (!setName.equals(setName2)) {
603 Set<Annotation> truePositives = pairwiseMatches.get(setName).get(setName2);
604 Set<Annotation> falseNegatives = pairwiseNonmatches.get(setName).get(setName2);
605 Set<Annotation> falsePositives = pairwiseNonmatches.get(setName2).get(setName);
606 double precision = (double) truePositives.size()
607 / ((double) truePositives.size() + (double) falsePositives.size());
608 double recall = (double) truePositives.size()
609 / ((double) truePositives.size() + (double) falseNegatives.size());
610 double f_score = ((double) 2 * precision * recall) / (recall + precision);
611
612 html.println("<tr><td>" + setName + "</td>" + "<td>" + setName2 + "</td>" + "<td>"
613 + truePositives.size() + "</td>" + "<td>" + falsePositives.size() + "</td>" + "<td>"
614 + falseNegatives.size() + "</td>" + "<td>" + percentageFormat.format(precision) + "</td>"
615 + "<td>" + percentageFormat.format(recall) + "</td>" + "<td>"
616 + percentageFormat.format(f_score) + "</td></tr>");
617 }
618 }
619 }
620 html.println("</table>");
621 html.println("Precision and recall are given equal weight for the F-score.");
622 }
623
624 }
625
626 // for(String type : sortedTypes)
627 // {
628 // errors.println("<h2>"+type+"</h2>");
629 // Set<Annotation> typeNonmatches = sortedNonmatches.get(type);
630 // for(Annotation annotation : typeNonmatches)
631 // {
632 // String docID = annotation.getDocID();
633 // writeAnnotationTextSourceHTML(errors, annotation,
634 // annotationTexts.get(annotation), annotationTextNames.get(annotation));
635 // errors.println("<ul><li>");
636 // printAnnotationHTML(errors, annotation, annotationTexts.get(annotation));
637 //
638 // Set<Annotation> candidateAnnotations =
639 // spanIndex.getOverlappingAnnotations(annotation);
640 // for(Annotation candidateAnnotation : candidateAnnotations)
641 // {
642 // String candidateAnnotationSet = candidateAnnotation.getSetName();
643 // if(!candidateAnnotationSet.equals(set))
644 // {
645 // String candidateDocID = candidateAnnotation.getDocID();
646 // if(candidateDocID.equals(docID))
647 // {
648 // errors.println("<li>");
649 // printAnnotationHTML(errors, candidateAnnotation,
650 // annotationTexts.get(candidateAnnotation));
651 // }
652 // }
653 // }
654 // errors.println("</ul>");
655 // }
656 // }
657
658 // for(String set : sets)
659 // {
660 // String matchesFileName = fileName+".matches."+set+".html";
661 //html.println("<li><a href=\""+matchesFileName+"\">matches for "+set+"</a></li>"
662 // );
663 // PrintStream matchesStream = new PrintStream(new File(directory,
664 // matchesFileName));
665 // Set<Annotation> matches = allwayMatches.get(set);
666 // Map<String, Set<Annotation>> sortedMatches = sortByType(classes, matches);
667 // Map<Annotation, Set<Annotation>> matchSets = new HashMap<Annotation,
668 // Set<Annotation>>();
669 // for(Annotation nonmatch : nonmatches)
670 // {
671 // comparisonAnnotations.put(nonmatch, getCandidateAnnotations(nonmatch,
672 // spanIndex));
673 // }
674 //
675 // initHTML(errors, "Non-matches for "+set, fileName+".html", fileName,
676 // "Each annotation that was considered a non-match is shown in the text that it was found in. Overlapping annotations from the other annotation sets are also shown."
677 // );
678 // printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts,
679 // annotationTextNames, comparisonAnnotations);
680 // errors.flush(); errors.close();
681 // }
682
683 // String matchesFileName = fileName+".matches.html";
684 // String trivialMatchesFileName = fileName+".trivial.matches.html";
685 // String nontrivialMatchesFileName = fileName+".nontrivial.matches.html";
686 // PrintStream matchesStream = new PrintStream(new File(directory,
687 // matchesFileName));
688 // PrintStream trivialMatchesStream = new PrintStream(new File(directory,
689 // trivialMatchesFileName));
690 // PrintStream nontrivialMatchesStream = new PrintStream(new File(directory,
691 // nontrivialMatchesFileName));
692 //
693 // html.println("<h2>match data</h2>");
694 // html.println("<ul><li><a href=\""+matchesFileName+"\">Matches</a></li>");
695 // if(matcher.returnsTrivials())
696 // { html.println("<li><a href=\""+trivialMatchesFileName+
697 // "\">Trivial matches</a></li>");
698 // html.println("<li><a href=\""+nontrivialMatchesFileName+
699 // "\">Non-trivial matches</a></li>");
700 // }
701 // html.println("</ul>");
702 //
703 // Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets();
704 // initHTML(matchesStream, "Matches", fileName+".html", fileName,
705 // "Each annotation that was considered a match is shown in the text that it was found in. Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences. Only one of the annotation's spans are bolded in the text."
706 // );
707 // if(matcher.returnsTrivials())
708 // {
709 // initHTML(trivialMatchesStream, "Trivial matches", fileName+".html", fileName,
710 // "Each annotation that was considered a match is shown in the text that it was found in. Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences. Only one of the annotation's spans are bolded in the text."
711 // );
712 // initHTML(nontrivialMatchesStream, "Non-trivial matches", fileName+".html",
713 // fileName,
714 // "Each annotation that was considered a match is shown in the text that it was found in. Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences. Only one of the annotation's span is bolded in the text."
715 // );
716 // }
717 //
718 // Set<Annotation> printedAnnotations = new HashSet<Annotation>();
719 // for(String type : sortedTypes)
720 // {
721 // matchesStream.println("<h2>"+type+"</h2>");
722 // trivialMatchesStream.println("<h2>"+type+"</h2>");
723 // nontrivialMatchesStream.println("<h2>"+type+"</h2>");
724 //
725 // Set<Annotation> typeTrivialMatches = sortedAllwayTrivialMatches.get(type);
726 // Set<Annotation> typeNontrivialMatches =
727 // sortedAllwayNontrivialMatches.get(type);
728 // Set<Annotation> typeMatches = sortedAllwayMatches.get(type);
729 //
730 // for(Annotation annotation : typeMatches)
731 // {
732 // if(printedAnnotations.contains(annotation)) continue;
733 // Set<Annotation> matchSet = matchSets.get(annotation);
734 //
735 // writeAnnotationTextSourceHTML(matchesStream, annotation,
736 // annotationTexts.get(annotation), annotationTextNames.get(annotation));
737 // matchesStream.println("<p>");
738 // printAnnotationHTML(matchesStream, annotation,
739 // annotationTexts.get(annotation));
740 // if(typeTrivialMatches.contains(annotation) && matcher.returnsTrivials())
741 // {
742 // writeAnnotationTextSourceHTML(trivialMatchesStream, annotation,
743 // annotationTexts.get(annotation), annotationTextNames.get(annotation));
744 // trivialMatchesStream.println("<p>");
745 // printAnnotationHTML(trivialMatchesStream, annotation,
746 // annotationTexts.get(annotation));
747 // }
748 // else if(typeNontrivialMatches.contains(annotation) &&
749 // matcher.returnsTrivials())
750 // {
751 // writeAnnotationTextSourceHTML(nontrivialMatchesStream, annotation,
752 // annotationTexts.get(annotation), annotationTextNames.get(annotation));
753 // nontrivialMatchesStream.println("<p>");
754 // printAnnotationHTML(nontrivialMatchesStream, annotation,
755 // annotationTexts.get(annotation));
756 // }
757 //
758 // printedAnnotations.add(annotation);
759 // for(Annotation matchedAnnotation : matchSet)
760 // {
761 // if(!matchedAnnotation.equals(annotation))
762 // {
763 // printAnnotationHTML(matchesStream, matchedAnnotation,
764 // annotationTexts.get(matchedAnnotation));
765 // if(typeTrivialMatches.contains(matchedAnnotation) &&
766 // matcher.returnsTrivials())
767 // {
768 // printAnnotationHTML(trivialMatchesStream, matchedAnnotation,
769 // annotationTexts.get(matchedAnnotation));
770 // }
771 // else if(typeNontrivialMatches.contains(matchedAnnotation) &&
772 // matcher.returnsTrivials())
773 // {
774 // printAnnotationHTML(nontrivialMatchesStream, matchedAnnotation,
775 // annotationTexts.get(matchedAnnotation));
776 // }
777 // printedAnnotations.add(matchedAnnotation);
778 // }
779 // }
780 // }
781 // }
782 // matchesStream.flush(); matchesStream.close();
783 // trivialMatchesStream.flush(); trivialMatchesStream.close();
784 // nontrivialMatchesStream.flush(); nontrivialMatchesStream.close();
785 //