001    /*
002     * The contents of this file are subject to the Mozilla Public
003     * License Version 1.1 (the "License"); you may not use this file
004     * except in compliance with the License. You may obtain a copy of
005     * the License at http://www.mozilla.org/MPL/
006     *
007     * Software distributed under the License is distributed on an "AS
008     * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009     * implied. See the License for the specific language governing
010     * rights and limitations under the License.
011     *
012     * The Original Code is Knowtator.
013     *
014     * The Initial Developer of the Original Code is University of Colorado.  
015     * Copyright (C) 2005 - 2008.  All Rights Reserved.
016     *
017     * Knowtator was developed by the Center for Computational Pharmacology
018     * (http://compbio.uchcs.edu) at the University of Colorado Health 
019     *  Sciences Center School of Medicine with support from the National 
020     *  Library of Medicine.  
021     *
022     * Current information about Knowtator can be obtained at 
023     * http://knowtator.sourceforge.net/
024     *
025     * Contributor(s):
026     *   Philip V. Ogren <philip@ogren.info> (Original Author)
027     */
028    
029    package edu.uchsc.ccp.iaa.html;
030    
031    import java.io.File;
032    import java.io.IOException;
033    import java.io.PrintStream;
034    import java.text.NumberFormat;
035    import java.util.ArrayList;
036    import java.util.Collection;
037    import java.util.Collections;
038    import java.util.HashMap;
039    import java.util.HashSet;
040    import java.util.List;
041    import java.util.Map;
042    import java.util.Set;
043    
044    import edu.uchsc.ccp.iaa.Annotation;
045    import edu.uchsc.ccp.iaa.AnnotationSpanIndex;
046    import edu.uchsc.ccp.iaa.IAA;
047    import edu.uchsc.ccp.iaa.Span;
048    import edu.uchsc.ccp.iaa.matcher.Matcher;
049    
050    public class IAA2HTML {
051    
052            public static void printIAA(IAA iaa, Matcher matcher, File directory, int numberOfDocs,
053                            Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames) throws Exception {
054                    NumberFormat percentageFormat = NumberFormat.getPercentInstance();
055                    percentageFormat.setMinimumFractionDigits(2);
056    
057                    String fileName = matcher.getName();
058    
059                    PrintStream tabular = new PrintStream(new File(directory, fileName + ".dat"));
060                    PrintStream html = new PrintStream(new File(directory, fileName + ".html"));
061    
062                    initHTML(html, matcher.getName(), null, null, matcher.getDescription());
063    
064                    printIntro(html, iaa, numberOfDocs, fileName, matcher);
065                    html.println("<p>");
066                    printTitleRowForAllwayIAA(html, matcher);
067    
068                    tabular
069                                    .println("This file is provided to facilitate cut-n-paste into a spreadsheet.\n"
070                                                    + "If you cannot directly copy the data below into a spreadsheet without it all going into a single cell,\n"
071                                                    + "then try copying to a text editor first and then copy it from there.  There is typically a 'paste special'\n"
072                                                    + "option under the Edit menu that will allow you to paste the copied data as text.  This will also work.\n\n\n");
073    
074                    if (matcher.returnsTrivials())
075                            tabular
076                                            .println("type\tmatches\ttrivial matches\tnon-trivial matches\tnon-matches\ttrivial non-matches\tnon-trivial non-matches");
077                    else
078                            tabular.println("type\tmatches\tnon-matches");
079    
080                    Set<String> classes = iaa.getAnnotationClasses();
081                    Set<String> sets = iaa.getSetNames();
082    
083                    Map<String, Set<Annotation>> allwayMatches = iaa.getAllwayMatches();
084                    Map<String, Set<Annotation>> nontrivialAllwayMatches = iaa.getNontrivialAllwayMatches();
085                    Map<String, Set<Annotation>> trivialAllwayMatches = iaa.getTrivialAllwayMatches();
086                    Map<String, Set<Annotation>> allwayNonmatches = iaa.getAllwayNonmatches();
087                    Map<String, Set<Annotation>> nontrivialAllwayNonmatches = iaa.getNontrivialAllwayNonmatches();
088                    Map<String, Set<Annotation>> trivialAllwayNonmatches = iaa.getTrivialAllwayNonmatches();
089    
090                    Set<Annotation> allwayMatchesSingleSet = getSingleSet(allwayMatches);
091                    Set<Annotation> trivialAllwayMatchesSingleSet = getSingleSet(trivialAllwayMatches);
092                    Set<Annotation> nontrivialAllwayMatchesSingleSet = getSingleSet(nontrivialAllwayMatches);
093                    Set<Annotation> allwayNonmatchesSingleSet = getSingleSet(allwayNonmatches);
094                    Set<Annotation> trivialAllwayNonmatchesSingleSet = getSingleSet(trivialAllwayNonmatches);
095                    Set<Annotation> nontrivialAllwayNonmatchesSingleSet = getSingleSet(nontrivialAllwayNonmatches);
096    
097                    AnnotationSpanIndex spanIndex = new AnnotationSpanIndex(allwayNonmatchesSingleSet);
098    
099                    int totalAllwayMatches = allwayMatchesSingleSet.size();
100                    int totalTrivialAllwayMatches = trivialAllwayMatchesSingleSet.size();
101                    int totalNontrivialAllwayMatches = nontrivialAllwayMatchesSingleSet.size();
102                    int totalAllwayNonmatches = allwayNonmatchesSingleSet.size();
103                    int totalTrivialAllwayNonmatches = trivialAllwayNonmatchesSingleSet.size();
104                    int totalNontrivialAllwayNonmatches = nontrivialAllwayNonmatchesSingleSet.size();
105    
106                    double iaaScore = (double) totalAllwayMatches / ((double) totalAllwayMatches + (double) totalAllwayNonmatches);
107                    double stingyIAAScore = (double) totalNontrivialAllwayMatches
108                                    / ((double) totalAllwayMatches + (double) totalAllwayNonmatches);
109                    double respectableIAAScore = (double) totalNontrivialAllwayMatches
110                                    / ((double) totalNontrivialAllwayMatches + (double) totalAllwayNonmatches);
111                    double nontrivialIAAScore = (double) totalNontrivialAllwayMatches
112                                    / ((double) totalNontrivialAllwayMatches + (double) totalNontrivialAllwayNonmatches);
113    
114                    if (matcher.returnsTrivials()) {
115                            html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>"
116                                            + "<td>" + percentageFormat.format(stingyIAAScore) + "</td>" + "<td>"
117                                            + percentageFormat.format(respectableIAAScore) + "</td>" + "<td>"
118                                            + percentageFormat.format(nontrivialIAAScore) + "</td>" + "<td>" + totalAllwayMatches + "</td>"
119                                            + "<td>" + totalTrivialAllwayMatches + "</td>" + "<td>" + totalNontrivialAllwayMatches + "</td>"
120                                            + "<td>" + totalAllwayNonmatches + "</td>" + "<td>" + totalTrivialAllwayNonmatches + "</td>"
121                                            + "<td>" + totalNontrivialAllwayNonmatches + "</td></tr>");
122                            tabular.println("All classes\t" + totalAllwayMatches + "\t" + totalTrivialAllwayMatches + "\t"
123                                            + totalNontrivialAllwayMatches + "\t" + totalAllwayNonmatches + "\t" + totalTrivialAllwayNonmatches
124                                            + "\t" + totalNontrivialAllwayNonmatches);
125                    } else {
126                            html.println("<tr><td><b>All classes</b></td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>"
127                                            + "<td>" + totalAllwayMatches + "</td>" + "<td>" + totalAllwayNonmatches + "</td></tr>");
128                            tabular.println("All classes\t" + totalAllwayMatches + "\t" + totalAllwayNonmatches);
129                    }
130    
131                    Map<String, Set<Annotation>> sortedAllwayMatches = sortByType(classes, allwayMatchesSingleSet);
132                    Map<String, Set<Annotation>> sortedAllwayTrivialMatches = sortByType(classes, trivialAllwayMatchesSingleSet);
133                    Map<String, Set<Annotation>> sortedAllwayNontrivialMatches = sortByType(classes,
134                                    nontrivialAllwayMatchesSingleSet);
135                    Map<String, Set<Annotation>> sortedAllwayNonmatches = sortByType(classes, allwayNonmatchesSingleSet);
136                    Map<String, Set<Annotation>> sortedAllwayTrivialNonmatches = sortByType(classes,
137                                    trivialAllwayNonmatchesSingleSet);
138                    Map<String, Set<Annotation>> sortedAllwayNontrivialNonmatches = sortByType(classes,
139                                    nontrivialAllwayNonmatchesSingleSet);
140    
141                    java.util.List<String> sortedTypes = new ArrayList<String>(classes);
142                    Collections.sort(sortedTypes);
143    
144                    for (String type : sortedTypes) {
145                            int classMatches = sortedAllwayMatches.get(type).size();
146                            int classTrivialMatches = sortedAllwayTrivialMatches.get(type).size();
147                            int classNontrivialMatches = sortedAllwayNontrivialMatches.get(type).size();
148                            int classNonmatches = sortedAllwayNonmatches.get(type).size();
149                            int classTrivialNonmatches = sortedAllwayTrivialNonmatches.get(type).size();
150                            int classNontrivialNonmatches = sortedAllwayNontrivialNonmatches.get(type).size();
151    
152                            iaaScore = (double) classMatches / ((double) classMatches + (double) classNonmatches);
153                            stingyIAAScore = (double) classNontrivialMatches / ((double) classMatches + (double) classNonmatches);
154                            respectableIAAScore = (double) classNontrivialMatches
155                                            / ((double) classNontrivialMatches + (double) classNonmatches);
156                            nontrivialIAAScore = (double) classNontrivialMatches
157                                            / ((double) classNontrivialMatches + (double) classNontrivialNonmatches);
158    
159                            if (matcher.returnsTrivials()) {
160                                    html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>"
161                                                    + "<td>" + percentageFormat.format(stingyIAAScore) + "</td>" + "<td>"
162                                                    + percentageFormat.format(respectableIAAScore) + "</td>" + "<td>"
163                                                    + percentageFormat.format(nontrivialIAAScore) + "</td>" + "<td>" + classMatches + "</td>"
164                                                    + "<td>" + classTrivialMatches + "</td>" + "<td>" + classNontrivialMatches + "</td>" + "<td>"
165                                                    + classNonmatches + "</td>" + "<td>" + classTrivialNonmatches + "</td>" + "<td>"
166                                                    + classNontrivialNonmatches + "</td></tr>");
167                                    tabular.println(type + "\t" + classMatches + "\t" + classTrivialMatches + "\t" + classNontrivialMatches
168                                                    + "\t" + classNonmatches + "\t" + classTrivialNonmatches + "\t" + classNontrivialNonmatches);
169                            } else {
170                                    html.println("<tr><td>" + type + "</td>" + "<td>" + percentageFormat.format(iaaScore) + "</td>"
171                                                    + "<td>" + classMatches + "</td>" + "<td>" + classNonmatches + "</td></tr>");
172                                    tabular.println(type + "\t" + classMatches + "\t" + classNonmatches);
173                            }
174                    }
175                    html.println("</table>");
176    
177                    printMatchData(html, sets, fileName, directory, allwayMatches, classes, spanIndex, sortedTypes,
178                                    annotationTexts, annotationTextNames, matcher, trivialAllwayMatches, nontrivialAllwayMatches, iaa);
179    
180                    printNonmatchData(html, sets, fileName, directory, allwayNonmatches, classes, spanIndex, sortedTypes,
181                                    annotationTexts, annotationTextNames, matcher, trivialAllwayNonmatches, nontrivialAllwayNonmatches);
182    
183                    Map<String, Map<String, Set<Annotation>>> pairwiseMatches = iaa.getPairwiseMatches();
184                    Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches = iaa.getPairwiseNonmatches();
185    
186                    printPairwiseAgreement(html, sets, pairwiseMatches, pairwiseNonmatches, percentageFormat);
187    
188                    html.flush();
189                    html.close();
190                    tabular.flush();
191                    tabular.close();
192            }
193    
194            private static void printMatchData(PrintStream html, Set<String> sets, String fileName, File directory,
195                            Map<String, Set<Annotation>> allwayMatches, Set<String> classes, AnnotationSpanIndex spanIndex,
196                            List<String> sortedTypes, Map<Annotation, String> annotationTexts,
197                            Map<Annotation, String> annotationTextNames, Matcher matcher,
198                            Map<String, Set<Annotation>> trivialAllwayMatches, Map<String, Set<Annotation>> nontrivialAllwayMatches,
199                            IAA iaa) throws Exception {
200                    html.println("<h2>match data</h2>");
201                    html.println("<ul>");
202    
203                    Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets();
204    
205                    for (String set : sets) {
206                            String matchesFileName = fileName + ".matches." + set + ".html";
207                            html.println("<li><a href=\"" + matchesFileName + "\">matches for " + set + "</a></li>");
208                            PrintStream matchesStream = new PrintStream(new File(directory, matchesFileName));
209                            Set<Annotation> matches = allwayMatches.get(set);
210                            Map<String, Set<Annotation>> sortedMatches = sortByType(classes, matches);
211    
212                            initHTML(
213                                            matchesStream,
214                                            "Matches for " + set,
215                                            fileName + ".html",
216                                            fileName,
217                                            "Each annotation that was considered a match is shown in the text that it was found in.  The matching annotations from the other annotation sets are also shown.");
218                            printInstances(matchesStream, sortedMatches, sortedTypes, annotationTexts, annotationTextNames, matchSets);
219                            matchesStream.flush();
220                            matchesStream.close();
221    
222                            if (matcher.returnsTrivials()) {
223                                    String trivialMatchesFileName = fileName + ".trivial.matches." + set + ".html";
224                                    html.println("<li><a href=\"" + trivialMatchesFileName + "\">trivial matches for " + set + "</a></li>");
225                                    PrintStream trivialMatchesStream = new PrintStream(new File(directory, trivialMatchesFileName));
226                                    Set<Annotation> trivialMatches = trivialAllwayMatches.get(set);
227                                    Map<String, Set<Annotation>> sortedTrivialMatches = sortByType(classes, trivialMatches);
228                                    initHTML(
229                                                    trivialMatchesStream,
230                                                    "Trivial matches for " + set,
231                                                    fileName + ".html",
232                                                    fileName,
233                                                    "Each annotation that was considered a trival match is shown in the text that it was found in.  The matching annotations from the other annotation sets are also shown.");
234                                    printInstances(trivialMatchesStream, sortedTrivialMatches, sortedTypes, annotationTexts,
235                                                    annotationTextNames, matchSets);
236                                    trivialMatchesStream.flush();
237                                    trivialMatchesStream.close();
238    
239                                    String nontrivialMatchesFileName = fileName + ".nontrivial.matches." + set + ".html";
240                                    html.println("<li><a href=\"" + nontrivialMatchesFileName + "\">non-trivial matches for " + set
241                                                    + "</a></li>");
242                                    PrintStream nontrivialMatchesStream = new PrintStream(new File(directory, nontrivialMatchesFileName));
243                                    Set<Annotation> nontrivialMatches = nontrivialAllwayMatches.get(set);
244                                    Map<String, Set<Annotation>> sortedNontrivialMatches = sortByType(classes, nontrivialMatches);
245                                    initHTML(
246                                                    nontrivialMatchesStream,
247                                                    "non-trivial non-matches for " + set,
248                                                    fileName + ".html",
249                                                    fileName,
250                                                    "Each annotation that was considered a non-trival match is shown in the text that it was found in.  The matching from the other annotation sets are also shown.");
251                                    printInstances(nontrivialMatchesStream, sortedNontrivialMatches, sortedTypes, annotationTexts,
252                                                    annotationTextNames, matchSets);
253                                    nontrivialMatchesStream.flush();
254                                    nontrivialMatchesStream.close();
255                            }
256                    }
257                    html.println("</ul><hr>");
258    
259            }
260    
261            private static void printNonmatchData(PrintStream html, Set<String> sets, String fileName, File directory,
262                            Map<String, Set<Annotation>> allwayNonmatches, Set<String> classes, AnnotationSpanIndex spanIndex,
263                            List<String> sortedTypes, Map<Annotation, String> annotationTexts,
264                            Map<Annotation, String> annotationTextNames, Matcher matcher,
265                            Map<String, Set<Annotation>> trivialAllwayNonmatches,
266                            Map<String, Set<Annotation>> nontrivialAllwayNonmatches) throws Exception {
267                    html.println("<h2>non-match data</h2>");
268                    html.println("<ul>");
269    
270                    for (String set : sets) {
271                            String errorsFileName = fileName + ".nonmatches." + set + ".html";
272                            html.println("<li><a href=\"" + errorsFileName + "\">non-matches for " + set + "</a></li>");
273                            PrintStream errors = new PrintStream(new File(directory, errorsFileName));
274                            Set<Annotation> nonmatches = allwayNonmatches.get(set);
275                            Map<String, Set<Annotation>> sortedNonmatches = sortByType(classes, nonmatches);
276    
277                            Map<Annotation, Set<Annotation>> comparisonAnnotations = new HashMap<Annotation, Set<Annotation>>();
278                            for (Annotation nonmatch : nonmatches) {
279                                    comparisonAnnotations.put(nonmatch, getCandidateAnnotations(nonmatch, spanIndex));
280                            }
281    
282                            initHTML(
283                                            errors,
284                                            "Non-matches for " + set,
285                                            fileName + ".html",
286                                            fileName,
287                                            "Each annotation that was considered a non-match is shown in the text that it was found in.  Overlapping annotations from the other annotation sets are also shown.");
288                            printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts, annotationTextNames,
289                                            comparisonAnnotations);
290                            errors.flush();
291                            errors.close();
292    
293                            if (matcher.returnsTrivials()) {
294                                    String trivialNonMatchesFileName = fileName + ".trivial.nonmatches." + set + ".html";
295                                    html.println("<li><a href=\"" + trivialNonMatchesFileName + "\">trivial non-matches for " + set
296                                                    + "</a></li>");
297                                    PrintStream trivialErrors = new PrintStream(new File(directory, trivialNonMatchesFileName));
298                                    Set<Annotation> trivialNonmatches = trivialAllwayNonmatches.get(set);
299                                    Map<String, Set<Annotation>> sortedTrivialNonmatches = sortByType(classes, trivialNonmatches);
300                                    initHTML(
301                                                    trivialErrors,
302                                                    "Trivial non-matches for " + set,
303                                                    fileName + ".html",
304                                                    fileName,
305                                                    "Each annotation that was considered a trival non-match is shown in the text that it was found in.  Overlapping annotations from the other annotation sets are also shown.");
306                                    printInstances(trivialErrors, sortedTrivialNonmatches, sortedTypes, annotationTexts,
307                                                    annotationTextNames, comparisonAnnotations);
308                                    trivialErrors.flush();
309                                    trivialErrors.close();
310    
311                                    String nontrivialNonMatchesFileName = fileName + ".nontrivial.nonmatches." + set + ".html";
312                                    html.println("<li><a href=\"" + nontrivialNonMatchesFileName + "\">non-trivial non-matches for " + set
313                                                    + "</a></li>");
314                                    PrintStream nontrivialErrors = new PrintStream(new File(directory, nontrivialNonMatchesFileName));
315                                    Set<Annotation> nontrivialNonmatches = nontrivialAllwayNonmatches.get(set);
316                                    Map<String, Set<Annotation>> sortedNontrivialNonmatches = sortByType(classes, nontrivialNonmatches);
317                                    initHTML(
318                                                    nontrivialErrors,
319                                                    "non-trivial non-matches for " + set,
320                                                    fileName + ".html",
321                                                    fileName,
322                                                    "Each annotation that was considered a non-trival non-match is shown in the text that it was found in.  Overlapping annotations from the other annotation sets are also shown.");
323                                    printInstances(nontrivialErrors, sortedNontrivialNonmatches, sortedTypes, annotationTexts,
324                                                    annotationTextNames, comparisonAnnotations);
325                                    nontrivialErrors.flush();
326                                    nontrivialErrors.close();
327                            }
328                    }
329                    html.println("</ul><hr>");
330            }
331    
332            public static Set<Annotation> getCandidateAnnotations(Annotation annotation, AnnotationSpanIndex spanIndex) {
333                    Set<Annotation> candidateAnnotations = new HashSet<Annotation>();
334                    String set = annotation.getSetName();
335                    String docID = annotation.getDocID();
336    
337                    Set<Annotation> overlappingAnnotations = spanIndex.getOverlappingAnnotations(annotation);
338                    for (Annotation overlappingAnnotation : overlappingAnnotations) {
339                            String candidateAnnotationSet = overlappingAnnotation.getSetName();
340                            if (!candidateAnnotationSet.equals(set)) {
341                                    String candidateDocID = overlappingAnnotation.getDocID();
342                                    if (candidateDocID.equals(docID)) {
343                                            candidateAnnotations.add(overlappingAnnotation);
344                                    }
345                            }
346                    }
347                    return candidateAnnotations;
348            }
349    
350            public static void printInstances(PrintStream out, Map<String, Set<Annotation>> sortedAnnotations,
351                            java.util.List<String> sortedTypes, Map<Annotation, String> annotationTexts,
352                            Map<Annotation, String> annotationTextNames, Map<Annotation, Set<Annotation>> comparisonAnnotations) {
353                    for (String type : sortedTypes) {
354                            out.println("<h2>" + type + "</h2>");
355                            Set<Annotation> typeAnnotations = sortedAnnotations.get(type);
356                            for (Annotation annotation : typeAnnotations) {
357                                    writeAnnotationTextSourceHTML(out, annotation, annotationTexts.get(annotation), annotationTextNames
358                                                    .get(annotation));
359                                    out.println("<ul><li>");
360                                    printAnnotationHTML(out, annotation, annotationTexts.get(annotation));
361    
362                                    Set<Annotation> comparisons = comparisonAnnotations.get(annotation);
363                                    if (comparisons != null) {
364                                            for (Annotation comparisonAnnotation : comparisons) {
365                                                    if (!comparisonAnnotation.equals(annotation)) {
366                                                            out.println("<li>");
367                                                            printAnnotationHTML(out, comparisonAnnotation, annotationTexts.get(comparisonAnnotation));
368                                                    }
369                                            }
370                                    }
371                                    out.println("</ul>");
372                            }
373                    }
374            }
375    
376            public static Set<Annotation> getSingleSet(Map<String, Set<Annotation>> annotations) {
377                    Set<Annotation> returnValues = new HashSet<Annotation>();
378                    for (String setName : annotations.keySet()) {
379                            returnValues.addAll(annotations.get(setName));
380                    }
381                    return returnValues;
382            }
383    
384            public static void initHTML(PrintStream html, String title, String link, String linkLabel, String description) {
385                    html.println("<html>");
386                    html.println("<head><title>" + title + "</title></head>");
387                    html.println("<body>");
388                    if (link != null)
389                            html.println("<a href=\"" + link + "\">" + linkLabel + "</a>");
390                    html.println("<h1>" + title + "</h1>");
391                    html.println(description);
392                    html.println("<hr>");
393            }
394    
395            public static Map<String, Set<Annotation>> sortByType(Set<String> types, Collection<Annotation> annotations) {
396                    Map<String, Set<Annotation>> sortedAnnotations = new HashMap<String, Set<Annotation>>();
397    
398                    for (String type : types) {
399                            sortedAnnotations.put(type, new HashSet<Annotation>());
400                    }
401                    for (Annotation annotation : annotations) {
402                            String type = annotation.getAnnotationClass();
403                            if (type != null)
404                                    sortedAnnotations.get(type).add(annotation);
405                    }
406                    return sortedAnnotations;
407            }
408    
409            static void writeAnnotationTextSourceHTML(PrintStream out, Annotation annotation, String annotationText,
410                            String annotationTextName) {
411                    StringBuffer html = new StringBuffer("<hr><p>");
412                    if (annotationTextName != null)
413                            html.append("Text source name = " + annotationTextName + "<p>");
414    
415                    if (annotationText != null) {
416                            java.util.List<Span> spans = annotation.getSpans();
417                            java.util.List<Span> modifiedSpans = new ArrayList<Span>(spans);
418    
419                            if (spans == null || spans.size() == 0) {
420                                    // spans should be the recursive spans
421                            }
422                            annotationText = shortenText(annotationText, modifiedSpans);
423    
424                            int mark = 0;
425    
426                            for (Span span : modifiedSpans) {
427                                    try {
428                                            html.append(annotationText.substring(mark, span.getStart()) + "<b>");
429                                            html.append(Span.substring(annotationText, span) + "</b>");
430                                            mark = span.getEnd();
431                                    } catch (StringIndexOutOfBoundsException sioobe) {
432                                            sioobe.printStackTrace();
433                                            System.out.println("annotationText=" + annotationText);
434                                            System.out.println("annotation = " + annotation.getSpans().get(0));
435                                            System.out.println("annotationTextName = " + annotationTextName);
436    
437                                    }
438    
439                            }
440                            if (mark < annotationText.length())
441                                    html.append(annotationText.substring(mark));
442                    }
443                    out.println(html.toString());
444            }
445    
446            private static String shortenText(String text, java.util.List<Span> spans) {
447                    int frontBuffer = 150;
448                    int endBuffer = 150;
449                    if (spans.size() > 0) {
450                            Span span = spans.get(0);
451                            int start = Math.max(0, span.getStart() - frontBuffer);
452                            int end = Math.min(text.length(), span.getEnd() + endBuffer);
453                            String substring = text.substring(start, end);
454    
455                            for (int i = 0; i < spans.size(); i++) {
456                                    span = spans.get(i);
457                                    Span offsetSpan = new Span(span.getStart() - start, span.getEnd() - start);
458                                    spans.set(i, offsetSpan);
459                            }
460                            return substring;
461                    }
462                    return text;
463            }
464    
465            static void printAnnotationHTML(PrintStream out, Annotation annotation, String annotationText) {
466                    StringBuffer html = new StringBuffer();
467    
468                    if (annotationText != null) {
469                            String coveredText = Annotation.getCoveredText(annotation, annotationText, " ... ");
470                            html.append(coveredText);
471                    }
472                    html.append("  " + annotation.toHTML());
473                    out.print(html.toString());
474            }
475    
476            static void printIntro(PrintStream html, IAA iaa, int numberOfDocs, String fileName, Matcher matcher) {
477                    html
478                                    .println("<p>For more detailed documentation on IAA please see the <a href=\"http://knowtator.sourceforge.net//iaa.shtml\">"
479                                                    + "IAA documentation</a>.");
480    
481                    html.println("<p>");
482                    html.println("<h2>" + iaa.getSetNames().size() + "-way IAA Results</h2>");
483                    html.println("IAA calculated on " + numberOfDocs + " documents.");
484                    html.println("<p><a href=\"" + fileName + ".dat\">tabular data</a>");
485                    html.println("<p>all annotations = matches + non-matches");
486                    html.println("<br> IAA = matches / all annotations");
487                    if (matcher.returnsTrivials()) {
488                            html.println("<br>stingy IAA = non-trivial matches / (matches + non-matches)");
489                            html.println("<br>respectable IAA = non-trivial matches / (non-trivial matches + non-matches)");
490                            html.println("<br>non-trivial IAA = non-trivial matches / (non-trivial matches + non-trivial non-matches)");
491                    }
492    
493            }
494    
495            static void printTitleRowForAllwayIAA(PrintStream html, Matcher matcher) {
496                    html.println("<table border=1><tr><td><b>Type</b></td>" + "<td><b>IAA</b></td>");
497                    if (matcher.returnsTrivials()) {
498                            html.println("<td><b>stingy IAA</b></td>" + "<td><b>respectable IAA</b></td>"
499                                            + "<td><b>non-trivial IAA</b></td>");
500                    }
501                    html.println("<td><b>matches</b></td>");
502                    if (matcher.returnsTrivials()) {
503                            html.println("<td><b>trivial matches</b></td>" + "<td><b>non-trivial matches</b></td>");
504                    }
505                    html.println("<td><b>non-matches</b></td>");
506                    if (matcher.returnsTrivials()) {
507                            html.println("<td><b>trivial non-matches</b></td>" + "<td><b>non-trivial non-matches</b></td>");
508                    }
509                    html.println("</tr>");
510            }
511    
512            public static String initHTML(String title, String description) {
513                    StringBuffer html = new StringBuffer();
514                    html.append("<html>\n");
515                    html.append("<head><title>" + title + "</title></head>\n");
516                    html.append("<body>\n");
517                    html.append("<h1>" + title + "</h1>\n");
518                    html.append(description);
519                    html
520                                    .append(" For more detailed documentation on IAA please see the <a href=\"http://knowtator.sourceforge.net//iaa.shtml\">IAA documentation</a>.\n");
521                    return html.toString();
522            }
523    
524            public static void printMatchData(PrintStream html, Set<String> sets, String fileName, File directory,
525                            Map<String, Set<Annotation>> allwayMatches, Map<Annotation, String> annotationTexts,
526                            Map<Annotation, String> annotationTextNames, Set<String> classes, IAA iaa) throws IOException
527    
528            {
529                    html.println("<h2>match data</h2>");
530                    html.println("<ul>");
531    
532                    Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets();
533                    java.util.List<String> sortedTypes = new ArrayList<String>(classes);
534                    Collections.sort(sortedTypes);
535    
536                    for (String set : sets) {
537                            String matchesFileName = fileName + ".matches." + set + ".html";
538                            html.println("<li><a href=\"" + matchesFileName + "\">matches for " + set + "</a></li>");
539                            PrintStream matchesStream = new PrintStream(new File(directory, matchesFileName));
540                            Set<Annotation> matches = allwayMatches.get(set);
541                            Map<String, Set<Annotation>> sortedMatches = IAA2HTML.sortByType(classes, matches);
542    
543                            matchesStream
544                                            .println(initHTML(
545                                                            "Matches for " + set,
546                                                            "Each annotation that was considered a match is shown in the text that it was found in.  The matching annotations from the other annotation sets are also shown."));
547                            IAA2HTML.printInstances(matchesStream, sortedMatches, sortedTypes, annotationTexts, annotationTextNames,
548                                            matchSets);
549                            matchesStream.flush();
550                            matchesStream.close();
551                    }
552                    html.println("</ul><hr>");
553            }
554    
555            public static void printNonmatchData(PrintStream html, Set<String> sets, String fileName, File directory,
556                            Map<String, Set<Annotation>> allwayNonmatches, AnnotationSpanIndex spanIndex,
557                            Map<Annotation, String> annotationTexts, Map<Annotation, String> annotationTextNames, Set<String> classes,
558                            IAA iaa) throws IOException {
559                    html.println("<h2>non-match data</h2>");
560                    html.println("<ul>");
561    
562                    java.util.List<String> sortedTypes = new ArrayList<String>(classes);
563                    Collections.sort(sortedTypes);
564    
565                    for (String set : sets) {
566                            String errorsFileName = fileName + ".nonmatches." + set + ".html";
567                            html.println("<li><a href=\"" + errorsFileName + "\">non-matches for " + set + "</a></li>");
568                            PrintStream errors = new PrintStream(new File(directory, errorsFileName));
569                            Set<Annotation> nonmatches = allwayNonmatches.get(set);
570                            Map<String, Set<Annotation>> sortedNonmatches = IAA2HTML.sortByType(classes, nonmatches);
571    
572                            Map<Annotation, Set<Annotation>> comparisonAnnotations = new HashMap<Annotation, Set<Annotation>>();
573                            for (Annotation nonmatch : nonmatches) {
574                                    comparisonAnnotations.put(nonmatch, IAA2HTML.getCandidateAnnotations(nonmatch, spanIndex));
575                            }
576    
577                            errors
578                                            .println(initHTML(
579                                                            "Non-matches for " + set,
580                                                            "Each annotation that was considered a non-match is shown in the text that it was found in.  Overlapping annotations from the other annotation sets are also shown."));
581                            IAA2HTML.printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts, annotationTextNames,
582                                            comparisonAnnotations);
583                            errors.flush();
584                            errors.close();
585    
586                    }
587                    html.println("</ul><hr>");
588            }
589    
590            public static void printPairwiseAgreement(PrintStream html, Set<String> sets,
591                            Map<String, Map<String, Set<Annotation>>> pairwiseMatches,
592                            Map<String, Map<String, Set<Annotation>>> pairwiseNonmatches, NumberFormat percentageFormat) {
593                    html.println("<h2>Pair-wise agreement</h2>");
594                    html.println("<table border=1><tr><td><b>Gold standard set</b></td>" + "<td><b>compared set</b></td>"
595                                    + "<td><b>true positives</b></td>" + "<td><b>false positives</b></td>"
596                                    + "<td><b>false negatives</b></td>" + "<td><b>precision</b></td>" + "<td><b>recall</b></td>"
597                                    + "<td><b>F-score</b></td></tr>");
598    
599                    for (String setName : sets) {
600                            for (String setName2 : sets) {
601    
602                                    if (!setName.equals(setName2)) {
603                                            Set<Annotation> truePositives = pairwiseMatches.get(setName).get(setName2);
604                                            Set<Annotation> falseNegatives = pairwiseNonmatches.get(setName).get(setName2);
605                                            Set<Annotation> falsePositives = pairwiseNonmatches.get(setName2).get(setName);
606                                            double precision = (double) truePositives.size()
607                                                            / ((double) truePositives.size() + (double) falsePositives.size());
608                                            double recall = (double) truePositives.size()
609                                                            / ((double) truePositives.size() + (double) falseNegatives.size());
610                                            double f_score = ((double) 2 * precision * recall) / (recall + precision);
611    
612                                            html.println("<tr><td>" + setName + "</td>" + "<td>" + setName2 + "</td>" + "<td>"
613                                                            + truePositives.size() + "</td>" + "<td>" + falsePositives.size() + "</td>" + "<td>"
614                                                            + falseNegatives.size() + "</td>" + "<td>" + percentageFormat.format(precision) + "</td>"
615                                                            + "<td>" + percentageFormat.format(recall) + "</td>" + "<td>"
616                                                            + percentageFormat.format(f_score) + "</td></tr>");
617                                    }
618                            }
619                    }
620                    html.println("</table>");
621                    html.println("Precision and recall are given equal weight for the F-score.");
622            }
623    
624    }
625    
626    // for(String type : sortedTypes)
627    // {
628    // errors.println("<h2>"+type+"</h2>");
629    // Set<Annotation> typeNonmatches = sortedNonmatches.get(type);
630    // for(Annotation annotation : typeNonmatches)
631    // {
632    // String docID = annotation.getDocID();
633    // writeAnnotationTextSourceHTML(errors, annotation,
634    // annotationTexts.get(annotation), annotationTextNames.get(annotation));
635    // errors.println("<ul><li>");
636    // printAnnotationHTML(errors, annotation, annotationTexts.get(annotation));
637    //        
638    // Set<Annotation> candidateAnnotations =
639    // spanIndex.getOverlappingAnnotations(annotation);
640    // for(Annotation candidateAnnotation : candidateAnnotations)
641    // {
642    // String candidateAnnotationSet = candidateAnnotation.getSetName();
643    // if(!candidateAnnotationSet.equals(set))
644    // {
645    // String candidateDocID = candidateAnnotation.getDocID();
646    // if(candidateDocID.equals(docID))
647    // {
648    // errors.println("<li>");
649    // printAnnotationHTML(errors, candidateAnnotation,
650    // annotationTexts.get(candidateAnnotation));
651    // }
652    // }
653    // }
654    // errors.println("</ul>");
655    // }
656    // }
657    
658    // for(String set : sets)
659    // {
660    // String matchesFileName = fileName+".matches."+set+".html";
661    //html.println("<li><a href=\""+matchesFileName+"\">matches for "+set+"</a></li>"
662    // );
663    // PrintStream matchesStream = new PrintStream(new File(directory,
664    // matchesFileName));
665    // Set<Annotation> matches = allwayMatches.get(set);
666    // Map<String, Set<Annotation>> sortedMatches = sortByType(classes, matches);
667    // Map<Annotation, Set<Annotation>> matchSets = new HashMap<Annotation,
668    // Set<Annotation>>();
669    // for(Annotation nonmatch : nonmatches)
670    // {
671    // comparisonAnnotations.put(nonmatch, getCandidateAnnotations(nonmatch,
672    // spanIndex));
673    // }
674    //
675    // initHTML(errors, "Non-matches for "+set, fileName+".html", fileName,
676    // "Each annotation that was considered a non-match is shown in the text that it was found in.  Overlapping annotations from the other annotation sets are also shown."
677    // );
678    // printInstances(errors, sortedNonmatches, sortedTypes, annotationTexts,
679    // annotationTextNames, comparisonAnnotations);
680    // errors.flush(); errors.close();
681    // }
682    
683    // String matchesFileName = fileName+".matches.html";
684    // String trivialMatchesFileName = fileName+".trivial.matches.html";
685    // String nontrivialMatchesFileName = fileName+".nontrivial.matches.html";
686    // PrintStream matchesStream = new PrintStream(new File(directory,
687    // matchesFileName));
688    // PrintStream trivialMatchesStream = new PrintStream(new File(directory,
689    // trivialMatchesFileName));
690    // PrintStream nontrivialMatchesStream = new PrintStream(new File(directory,
691    // nontrivialMatchesFileName));
692    //
693    // html.println("<h2>match data</h2>");
694    // html.println("<ul><li><a href=\""+matchesFileName+"\">Matches</a></li>");
695    // if(matcher.returnsTrivials())
696    // { html.println("<li><a href=\""+trivialMatchesFileName+
697    // "\">Trivial matches</a></li>");
698    // html.println("<li><a href=\""+nontrivialMatchesFileName+
699    // "\">Non-trivial matches</a></li>");
700    // }
701    // html.println("</ul>");
702    //
703    // Map<Annotation, Set<Annotation>> matchSets = iaa.getAllwayMatchSets();
704    // initHTML(matchesStream, "Matches", fileName+".html", fileName,
705    // "Each annotation that was considered a match is shown in the text that it was found in.  Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences.  Only one of the annotation's spans are bolded in the text."
706    // );
707    // if(matcher.returnsTrivials())
708    // {
709    // initHTML(trivialMatchesStream, "Trivial matches", fileName+".html", fileName,
710    // "Each annotation that was considered a match is shown in the text that it was found in.  Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences.  Only one of the annotation's spans are bolded in the text."
711    // );
712    // initHTML(nontrivialMatchesStream, "Non-trivial matches", fileName+".html",
713    // fileName,
714    // "Each annotation that was considered a match is shown in the text that it was found in.  Annotations from each of annotation sets are shown because there may be differences in the individual annotations if the match criteria ignored those differences.  Only one of the annotation's span is bolded in the text."
715    // );
716    // }
717    //
718    // Set<Annotation> printedAnnotations = new HashSet<Annotation>();
719    // for(String type : sortedTypes)
720    // {
721    // matchesStream.println("<h2>"+type+"</h2>");
722    // trivialMatchesStream.println("<h2>"+type+"</h2>");
723    // nontrivialMatchesStream.println("<h2>"+type+"</h2>");
724    //  
725    // Set<Annotation> typeTrivialMatches = sortedAllwayTrivialMatches.get(type);
726    // Set<Annotation> typeNontrivialMatches =
727    // sortedAllwayNontrivialMatches.get(type);
728    // Set<Annotation> typeMatches = sortedAllwayMatches.get(type);
729    //  
730    // for(Annotation annotation : typeMatches)
731    // {
732    // if(printedAnnotations.contains(annotation)) continue;
733    // Set<Annotation> matchSet = matchSets.get(annotation);
734    //      
735    // writeAnnotationTextSourceHTML(matchesStream, annotation,
736    // annotationTexts.get(annotation), annotationTextNames.get(annotation));
737    // matchesStream.println("<p>");
738    // printAnnotationHTML(matchesStream, annotation,
739    // annotationTexts.get(annotation));
740    // if(typeTrivialMatches.contains(annotation) && matcher.returnsTrivials())
741    // {
742    // writeAnnotationTextSourceHTML(trivialMatchesStream, annotation,
743    // annotationTexts.get(annotation), annotationTextNames.get(annotation));
744    // trivialMatchesStream.println("<p>");
745    // printAnnotationHTML(trivialMatchesStream, annotation,
746    // annotationTexts.get(annotation));
747    // }
748    // else if(typeNontrivialMatches.contains(annotation) &&
749    // matcher.returnsTrivials())
750    // {
751    // writeAnnotationTextSourceHTML(nontrivialMatchesStream, annotation,
752    // annotationTexts.get(annotation), annotationTextNames.get(annotation));
753    // nontrivialMatchesStream.println("<p>");
754    // printAnnotationHTML(nontrivialMatchesStream, annotation,
755    // annotationTexts.get(annotation));
756    // }
757    //      
758    // printedAnnotations.add(annotation);
759    // for(Annotation matchedAnnotation : matchSet)
760    // {
761    // if(!matchedAnnotation.equals(annotation))
762    // {
763    // printAnnotationHTML(matchesStream, matchedAnnotation,
764    // annotationTexts.get(matchedAnnotation));
765    // if(typeTrivialMatches.contains(matchedAnnotation) &&
766    // matcher.returnsTrivials())
767    // {
768    // printAnnotationHTML(trivialMatchesStream, matchedAnnotation,
769    // annotationTexts.get(matchedAnnotation));
770    // }
771    // else if(typeNontrivialMatches.contains(matchedAnnotation) &&
772    // matcher.returnsTrivials())
773    // {
774    // printAnnotationHTML(nontrivialMatchesStream, matchedAnnotation,
775    // annotationTexts.get(matchedAnnotation));
776    // }
777    // printedAnnotations.add(matchedAnnotation);
778    // }
779    // }
780    // }
781    // }
782    // matchesStream.flush(); matchesStream.close();
783    // trivialMatchesStream.flush(); trivialMatchesStream.close();
784    // nontrivialMatchesStream.flush(); nontrivialMatchesStream.close();
785    //