001    /*
002     * The contents of this file are subject to the Mozilla Public
003     * License Version 1.1 (the "License"); you may not use this file
004     * except in compliance with the License. You may obtain a copy of
005     * the License at http://www.mozilla.org/MPL/
006     *
007     * Software distributed under the License is distributed on an "AS
008     * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009     * implied. See the License for the specific language governing
010     * rights and limitations under the License.
011     *
012     * The Original Code is Knowtator.
013     *
014     * The Initial Developer of the Original Code is University of Colorado.  
015     * Copyright (C) 2005 - 2008.  All Rights Reserved.
016     *
017     * Knowtator was developed by the Center for Computational Pharmacology
018     * (http://compbio.uchcs.edu) at the University of Colorado Health 
019     *  Sciences Center School of Medicine with support from the National 
020     *  Library of Medicine.  
021     *
022     * Current information about Knowtator can be obtained at 
023     * http://knowtator.sourceforge.net/
024     *
025     * Contributor(s):
026     *   Philip V. Ogren <philip@ogren.info> (Original Author)
027     */
028    package edu.uchsc.ccp.knowtator.util;
029    
030    import java.util.Collection;
031    import java.util.HashMap;
032    import java.util.HashSet;
033    import java.util.Map;
034    import java.util.Set;
035    
036    import org.apache.log4j.Logger;
037    
038    import edu.stanford.smi.protege.event.FrameAdapter;
039    import edu.stanford.smi.protege.event.FrameEvent;
040    import edu.stanford.smi.protege.model.Cls;
041    import edu.stanford.smi.protege.model.Frame;
042    import edu.stanford.smi.protege.model.KnowledgeBase;
043    import edu.stanford.smi.protege.model.SimpleInstance;
044    import edu.stanford.smi.protege.model.Slot;
045    import edu.uchsc.ccp.knowtator.AnnotationUtil;
046    import edu.uchsc.ccp.knowtator.FilterUtil;
047    import edu.uchsc.ccp.knowtator.KnowtatorManager;
048    import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil;
049    import edu.uchsc.ccp.knowtator.MentionUtil;
050    
051    /**
052     * 
053     * @author Philip V. Ogren
054     * 
055     *        This class does not represent a consensus set of annotations. It
056     *         serves a class that helps manage a set of consensus annotations. When
057     *         the menu item 'Create Consensus Set' is run an instance of 'knowtator
058     *         set' will be created. Annotations in this set define a consensus set.
059     *         When redundant annotations exist in the set we want to consolidate
060     *         them. This is done by identifying that there is an annotation from
061     *         each of the individual annotators that is exactly the same. We can
062     *         then change the annotator to one of the annotations to the team
063     *         annotator and discard the others. This class facilitates the
064     *         consolidation of such redundant annotations - typically one text
065     *         source at a time.
066     */
067    
068    public class ConsensusSet {
069            Logger logger = Logger.getLogger(ConsensusSet.class);
070    
071            // A collection of all of the annotations in the ConsensusSet
072            Set<SimpleInstance> annotations;
073    
074            // A collection of all of the mentions in the ConsensusSet
075            Set<SimpleInstance> mentions;
076    
077            // contains each of the individual annotators in the ConsensusSet.
078            // These are obtained from the filter passed into the constructor.
079            Set<SimpleInstance> annotators;
080    
081            // The team annotator that consists of each of the annotators found
082            // in the annotators member variable (previous).
083            SimpleInstance teamAnnotator;
084    
085            // key is an annotator, value is a set of annotations by the annotator.
086            Map<SimpleInstance, Set<SimpleInstance>> annotatorAnnotations;
087    
088            // key is a class or instance mention that is a value of a slot mention. The
089            // value is a complex slot mention.
090            Map<SimpleInstance, Set<SimpleInstance>> slotValueToComplexMention;
091    
092            KnowtatorManager manager;
093    
094            AnnotationUtil annotationUtil;
095    
096            MentionUtil mentionUtil;
097    
098            KnowtatorProjectUtil kpu;
099    
100            KnowledgeBase kb;
101    
102            ConsensusFrameAdapter frameListener;
103    
104            boolean consolidatingAnnotations = false;
105    
106            /**
107             * @param annotations
108             *            some subset of the annotations that are found in a consensus
109             *            set. Typically, all of the annotations corresponding to a
110             *            single text source that pass through the consensusFilter
111             * @throws ConsensusException
112             */
113            /**
114             * 
115             */
116            public ConsensusSet(Set<SimpleInstance> annotations, SimpleInstance consensusSet, KnowtatorManager manager)
117                            throws ConsensusException {
118                    logger.debug("creating consensus set");
119                    this.annotations = annotations;
120                    this.manager = manager;
121                    this.annotationUtil = manager.getAnnotationUtil();
122                    this.mentionUtil = manager.getMentionUtil();
123                    this.kpu = manager.getKnowtatorProjectUtil();
124                    this.kb = manager.getKnowledgeBase();
125    
126                    this.teamAnnotator = (SimpleInstance) consensusSet.getOwnSlotValue(kpu.getConsensusSetTeamAnnotatorSlot());
127                    SimpleInstance individualFilter = (SimpleInstance) consensusSet.getOwnSlotValue(kpu
128                                    .getConsensusSetIndividualFilterSlot());
129                    this.annotators = new HashSet<SimpleInstance>(FilterUtil.getAnnotators(individualFilter));
130    
131                    /*
132                     * Throw some exceptions if the annotators provided by the filter do not
133                     * makes sense.
134                     */
135                    if (teamAnnotator == null)
136                            throw new ConsensusException("There is no team annotator for this consensus set."
137                                            + "\nPlease make sure that the provided filter has" + "\na single team annotator.");
138                    if (annotators.size() < 2)
139                            throw new ConsensusException("There are less than two annotators for this "
140                                            + "\nconsensus set.  Please make sure that the " + "\nprovided filter has two or more individual "
141                                            + "\nannotators.");
142    
143                    Collection<SimpleInstance> teamAnnotators = (Collection<SimpleInstance>) teamAnnotator.getOwnSlotValues(kpu
144                                    .getAnnotatorTeamMembersSlot());
145                    if (teamAnnotators.size() != annotators.size())
146                            throw new ConsensusException("The team annotator provided by the filter is "
147                                            + "\nnot consistent with the individual annotators " + "\nalso specified in the filter." + "\n "
148                                            + teamAnnotators.size() + " annotators specified " + "\nin team annotator and " + annotators.size()
149                                            + " " + "\nspecified by the filter passed into the consensus " + "\nset.");
150                    for (SimpleInstance tmAnnotator : teamAnnotators) {
151                            if (!annotators.contains(tmAnnotator))
152                                    throw new ConsensusException("The team annotator provided by the filter is "
153                                                    + "\nnot consistent with the individual annotators " + "\nalso specified in the filter"
154                                                    + "\n TeamAnnotator=" + teamAnnotator.getBrowserText() + "\nand missing annotator="
155                                                    + tmAnnotator.getBrowserText());
156                    }
157    
158                    // collect all of the mentions associated with the annotations and
159                    // populate annotatorAnnotations
160                    mentions = new HashSet<SimpleInstance>();
161                    annotatorAnnotations = new HashMap<SimpleInstance, Set<SimpleInstance>>();
162                    frameListener = new ConsensusFrameAdapter();
163                    for (SimpleInstance annotation : annotations) {
164                            annotation.addFrameListener(frameListener);
165                            SimpleInstance mention = annotationUtil.getMention(annotation);
166                            mentions.addAll(mentionUtil.getAllConnectedMentions(mention));
167    
168                            SimpleInstance annotator = annotationUtil.getAnnotator(annotation);
169                            if (!annotators.contains(annotator) && !annotator.equals(teamAnnotator))
170                                    throw new ConsensusException(
171                                                    "An annotation in this set was created by an annotator other than the annotators specified by the filter or the team annotators"
172                                                                    + "\n The offending annotations is \""
173                                                                    + annotation.getBrowserText()
174                                                                    + "\" with annotator = " + annotator.getBrowserText());
175                            if (!annotatorAnnotations.containsKey(annotator)) {
176                                    annotatorAnnotations.put(annotator, new HashSet<SimpleInstance>());
177                            }
178                            annotatorAnnotations.get(annotator).add(annotation);
179                    }
180    
181                    slotValueToComplexMention = new HashMap<SimpleInstance, Set<SimpleInstance>>();
182                    for (SimpleInstance mention : mentions) {
183                            mention.addFrameListener(frameListener);
184    
185                            if (mentionUtil.isComplexSlotMention(mention)) {
186                                    java.util.List<Object> slotValues = mentionUtil.getSlotMentionValues(mention);
187                                    if (slotValues.size() > 0) {
188                                            for (Object slotValue : slotValues) {
189                                                    SimpleInstance slotValueInstance = (SimpleInstance) slotValue;
190                                                    if (!slotValueToComplexMention.containsKey(slotValueInstance))
191                                                            slotValueToComplexMention.put(slotValueInstance, new HashSet<SimpleInstance>());
192                                                    slotValueToComplexMention.get(slotValueInstance).add(mention);
193                                            }
194                                    }
195                            }
196                    }
197            }
198    
199            /**
200             * This method finds all complex slot mentions that have originalMention as
201             * one of its values and 'replaces' it with newMention. The originalMention
202             * will always be removed from the set of slot values of the complex slot
203             * mention. However, newMention will only be added to the set of slot values
204             * only if it is not already there.
205             * 
206             * @param originalMention
207             * @param newMention
208             */
209    
210            private void replaceSlotValue(SimpleInstance originalMention, SimpleInstance newMention) {
211                    if (slotValueToComplexMention.containsKey(originalMention)) {
212                            Set<SimpleInstance> complexMentions = slotValueToComplexMention.get(originalMention);
213                            for (SimpleInstance complexMention : complexMentions) {
214                                    java.util.List<Object> complexMentionValues = mentionUtil.getSlotMentionValues(complexMention);
215                                    if (!complexMentionValues.contains(newMention))
216                                            mentionUtil.addValueToSlotMention(complexMention, newMention);
217                                    mentionUtil.removeValueFromSlotMention(complexMention, originalMention);
218                            }
219                    }
220            }
221    
222            /**
223             * This method consolidates two anntotations into one. It does this by
224             * removing one annotation from the consensus set (the redundantAnnotation)
225             * and making the other as annotated by the 'Team Annotator'. One of the
226             * important things that is done before the redundant annotation is deleted
227             * is to have the complex slot mentions whose value is the redundant
228             * annotation change their value to the consensusAnnotation. This is done
229             * with a call to replaceSlotValue.
230             * 
231             * @param consensusAnnotation
232             *            the annotation that will stick around
233             * @param redundantAnnotation
234             *            the annotation that is going away
235             * @see #replaceSlotValue(SimpleInstance, SimpleInstance)
236             */
237            public void consolidateAnnotations(SimpleInstance consensusAnnotation, SimpleInstance redundantAnnotation) {
238                    // set annotator of consensus annotation to the team annotator.
239                    consensusAnnotation.setDirectOwnSlotValue(kpu.getAnnotationAnnotatorSlot(), teamAnnotator);
240    
241                    // remove redundantAnnotation from ConsensusSet member variables
242                    annotations.remove(redundantAnnotation);
243                    SimpleInstance annotator = annotationUtil.getAnnotator(redundantAnnotation);
244                    if (annotator != null)
245                            annotatorAnnotations.get(annotator).remove(redundantAnnotation);
246    
247                    // replace slot mentions/values that correspond to the redundant
248                    // annotation with the slot mention/value of the consensus annotation
249                    SimpleInstance consensusMention = annotationUtil.getMention(consensusAnnotation);
250                    SimpleInstance redundantMention = annotationUtil.getMention(redundantAnnotation);
251                    replaceSlotValue(redundantMention, consensusMention);
252                    // remove redundantMention from ConsensusSet member variables
253                    mentions.remove(redundantMention);
254                    slotValueToComplexMention.remove(redundantMention);
255    
256                    redundantMention.removeFrameListener(frameListener);
257                    redundantAnnotation.removeFrameListener(frameListener);
258                    manager.deleteAnnotation(redundantAnnotation);
259            }
260    
261            /**
262             * This method finds annotations that are 'redundant' in the consensus set.
263             * An annotation is considered redundant if it is created by an individual
264             * annotator and it is the same as an annotation from each of the other
265             * annotators. Two annotations are the same if they have the same same span
266             * and the corresponding mentions of the annotations are identical.
267             * 
268             * @param annotation
269             *            we are looking for annotations that are exactly like this one.
270             * @return the annotations that are the same as the passed in annotation.
271             *         Does not contain the passed in annotation. Will not return null;
272             */
273            private Set<SimpleInstance> getRedundantAnnotations(SimpleInstance annotation) {
274                    SimpleInstance mention = annotationUtil.getMention(annotation);
275                    SimpleInstance annotator = annotationUtil.getAnnotator(annotation);
276                    Set<SimpleInstance> matchedAnnotations = new HashSet<SimpleInstance>();
277    
278                    for (SimpleInstance compareAnnotator : annotators) {
279                            if (!annotator.equals(compareAnnotator) && !annotator.equals(teamAnnotator)) {
280                                    boolean annotatorMatched = false;
281                                    Set<SimpleInstance> candidateAnnotations = annotatorAnnotations.get(compareAnnotator);
282                                    if (candidateAnnotations == null) {
283                                            matchedAnnotations.clear();
284                                            return matchedAnnotations;
285                                    }
286                                    for (SimpleInstance candidateAnnotation : candidateAnnotations) {
287                                            if (candidateAnnotation.isDeleted() || candidateAnnotation.isBeingDeleted())
288                                                    continue;
289                                            if (strictMatch(annotation, candidateAnnotation, annotationUtil, mentionUtil)) {
290                                                    SimpleInstance matchMention = annotationUtil.getMention(candidateAnnotation);
291                                                    if (mentionUtil.equals(mention, matchMention, true)) {
292                                                            matchedAnnotations.add(candidateAnnotation);
293                                                            annotatorMatched = true;
294                                                            // add break here if you want to make sure that
295                                                            // annotation matches only one annotation from each
296                                                            // annotator.
297                                                            // It may be that the individual has created
298                                                            // redundant annotations.
299                                                    }
300                                            }
301                                    }
302                                    if (!annotatorMatched) {
303                                            matchedAnnotations.clear();
304                                            return matchedAnnotations;
305                                    }
306                            }
307                    }
308                    return matchedAnnotations;
309            }
310    
311            private boolean strictMatch(SimpleInstance annotation1, SimpleInstance annotation2, AnnotationUtil annotationUtil,
312                            MentionUtil mentionUtil) {
313                    SimpleInstance mention1 = annotationUtil.getMention(annotation1);
314                    Cls mentionType1 = mentionUtil.getMentionCls(mention1);
315                    SimpleInstance mention2 = annotationUtil.getMention(annotation2);
316                    Cls mentionType2 = mentionUtil.getMentionCls(mention2);
317                    boolean typesMatch = false;
318                    if (mentionType1 == null && mentionType2 == null)
319                            typesMatch = true;
320                    else if (mentionType1 != null && mentionType1.equals(mentionType2))
321                            typesMatch = true;
322    
323                    if (typesMatch && annotationUtil.compareSpans(annotation1, annotation2)) {
324                            return true;
325                    }
326                    return false;
327            }
328    
329            public void consolidateAnnotations() {
330                    try {
331                            consolidatingAnnotations = true;
332    
333                            Set<SimpleInstance> excludeAnnotations = new HashSet<SimpleInstance>();
334                            // key is an annotation that we will keep, the values are redundant
335                            // copies
336                            Map<SimpleInstance, Set<SimpleInstance>> consolidatedAnnotations = new HashMap<SimpleInstance, Set<SimpleInstance>>();
337                            // key is a mention that can be removed/replaced with the value
338                            Map<SimpleInstance, SimpleInstance> consolidatedMentions = new HashMap<SimpleInstance, SimpleInstance>();
339    
340                            // loop through all the annotations created for the consensus set
341                            // and look for identical/redundant annotations to remove.
342                            // this loop will populate consolidatedAnnotations and
343                            // consolidatedMentions
344                            for (SimpleInstance annotation : annotations) {
345                                    if (annotation.isDeleted() || annotation.isBeingDeleted())
346                                            continue;
347                                    SimpleInstance annotator = annotationUtil.getAnnotator(annotation);
348                                    if (annotator.equals(teamAnnotator))
349                                            continue;
350                                    if (!excludeAnnotations.contains(annotation)) {
351                                            Set<SimpleInstance> redundantAnnotations = getRedundantAnnotations(annotation);
352                                            if (redundantAnnotations == null)
353                                                    continue;
354                                            if (redundantAnnotations.size() > 0) {
355                                                    consolidatedAnnotations.put(annotation, redundantAnnotations);
356                                                    excludeAnnotations.addAll(redundantAnnotations);
357                                                    SimpleInstance mention = annotationUtil.getMention(annotation);
358                                                    for (SimpleInstance redundantAnnotation : redundantAnnotations) {
359                                                            SimpleInstance redundantMention = annotationUtil.getMention(redundantAnnotation);
360                                                            consolidatedMentions.put(redundantMention, mention);
361                                                    }
362                                            }
363                                    }
364                            }
365    
366                            for (SimpleInstance redundantMention : consolidatedMentions.keySet()) {
367                                    replaceSlotValue(redundantMention, consolidatedMentions.get(redundantMention));
368                            }
369    
370                            for (SimpleInstance consensusAnnotation : consolidatedAnnotations.keySet()) {
371                                    Set<SimpleInstance> redundantAnnotations = consolidatedAnnotations.get(consensusAnnotation);
372                                    for (SimpleInstance redundantAnnotation : redundantAnnotations) {
373                                            consolidateAnnotations(consensusAnnotation, redundantAnnotation);
374                                    }
375                            }
376                            consolidatingAnnotations = false;
377                    } catch (NullPointerException npe) {
378                            npe.printStackTrace();
379                    }
380                    manager.updateCurrentAnnotations();
381            }
382    
383            protected void finalize() {
384                    for (SimpleInstance mention : mentions) {
385                            mention.removeFrameListener(frameListener);
386                    }
387            }
388    
389            public void destroy() {
390                    for (SimpleInstance mention : mentions) {
391                            mention.removeFrameListener(frameListener);
392                    }
393                    for (SimpleInstance annotation : annotations) {
394                            annotation.removeFrameListener(frameListener);
395                    }
396                    annotations.clear();
397                    mentions.clear();
398                    annotators.clear();
399                    annotatorAnnotations.clear();
400                    slotValueToComplexMention.clear();
401            }
402    
403            public class ConsensusFrameAdapter extends FrameAdapter {
404                    public void ownSlotValueChanged(FrameEvent event) {
405    
406                            if (!consolidatingAnnotations) {
407                                    Frame eventFrame = event.getFrame();
408    
409                                    // TODO looks like I need to also check for mentions here.
410                                    if (eventFrame instanceof SimpleInstance) {
411                                            SimpleInstance eventInstance = (SimpleInstance) eventFrame;
412                                            if (annotationUtil.isAnnotation(eventInstance)) {
413                                                    SimpleInstance annotator = annotationUtil.getAnnotator(eventInstance);
414                                                    if (annotator != null && annotator.equals(teamAnnotator))
415                                                            return;
416                                            }
417                                    }
418                                    Slot eventSlot = event.getSlot();
419                                    if (eventSlot == null)
420                                            return;
421                                    if (eventSlot.equals(kpu.getMentionSlotSlot()) || eventSlot.equals(kpu.getMentionSlotValueSlot())
422                                                    || eventSlot.equals(kpu.getMentionClassSlot())
423                                                    || eventSlot.equals(kpu.getMentionInstanceSlot())
424                                                    || eventSlot.equals(kpu.getAnnotatedMentionSlot())
425                                                    || eventSlot.equals(kpu.getAnnotationSpanSlot()))
426                                            consolidateAnnotations();
427                                    return;
428                            }
429                    }
430    
431                    public void deleted(FrameEvent event) {
432                            // System.out.println("deleted");
433                            // Frame eventFrame = event.getFrame();
434                            // System.out.println("eventFrame="+eventFrame);
435                            // why doesn't this spit out statements?
436                    }
437            }
438    
439            public SimpleInstance getTeamAnnotator() {
440                    return teamAnnotator;
441            }
442    }
443    
444    /*
445     * Here we find the team annotator and the individual annotators and initialize
446     * the variables teamAnnotator and annotators, respectively. If the passed in
447     * filter does not have the appropriate annotators, then an exception will be
448     * thrown.
449     */
450    // Set<SimpleInstance> filterAnnotators =
451    // FilterUtil.getAnnotators(consensusFilter);
452    // this.annotators = new HashSet<SimpleInstance>();
453    // for (SimpleInstance filterAnnotator : filterAnnotators)
454    // {
455    // if(AnnotatorUtil.isTeamAnnotator(filterAnnotator))
456    // {
457    // this.teamAnnotator = filterAnnotator;
458    // }
459    // else
460    // annotators.add(filterAnnotator);
461    // }