001 /*
002 * The contents of this file are subject to the Mozilla Public
003 * License Version 1.1 (the "License"); you may not use this file
004 * except in compliance with the License. You may obtain a copy of
005 * the License at http://www.mozilla.org/MPL/
006 *
007 * Software distributed under the License is distributed on an "AS
008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009 * implied. See the License for the specific language governing
010 * rights and limitations under the License.
011 *
012 * The Original Code is Knowtator.
013 *
014 * The Initial Developer of the Original Code is University of Colorado.
015 * Copyright (C) 2005 - 2008. All Rights Reserved.
016 *
017 * Knowtator was developed by the Center for Computational Pharmacology
018 * (http://compbio.uchcs.edu) at the University of Colorado Health
019 * Sciences Center School of Medicine with support from the National
020 * Library of Medicine.
021 *
022 * Current information about Knowtator can be obtained at
023 * http://knowtator.sourceforge.net/
024 *
025 * Contributor(s):
026 * Philip V. Ogren <philip@ogren.info> (Original Author)
027 */
028 package edu.uchsc.ccp.knowtator.util;
029
030 import java.util.Collection;
031 import java.util.HashMap;
032 import java.util.HashSet;
033 import java.util.Map;
034 import java.util.Set;
035
036 import org.apache.log4j.Logger;
037
038 import edu.stanford.smi.protege.event.FrameAdapter;
039 import edu.stanford.smi.protege.event.FrameEvent;
040 import edu.stanford.smi.protege.model.Cls;
041 import edu.stanford.smi.protege.model.Frame;
042 import edu.stanford.smi.protege.model.KnowledgeBase;
043 import edu.stanford.smi.protege.model.SimpleInstance;
044 import edu.stanford.smi.protege.model.Slot;
045 import edu.uchsc.ccp.knowtator.AnnotationUtil;
046 import edu.uchsc.ccp.knowtator.FilterUtil;
047 import edu.uchsc.ccp.knowtator.KnowtatorManager;
048 import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil;
049 import edu.uchsc.ccp.knowtator.MentionUtil;
050
051 /**
052 *
053 * @author Philip V. Ogren
054 *
055 * This class does not represent a consensus set of annotations. It
056 * serves a class that helps manage a set of consensus annotations. When
057 * the menu item 'Create Consensus Set' is run an instance of 'knowtator
058 * set' will be created. Annotations in this set define a consensus set.
059 * When redundant annotations exist in the set we want to consolidate
060 * them. This is done by identifying that there is an annotation from
061 * each of the individual annotators that is exactly the same. We can
062 * then change the annotator to one of the annotations to the team
063 * annotator and discard the others. This class facilitates the
064 * consolidation of such redundant annotations - typically one text
065 * source at a time.
066 */
067
068 public class ConsensusSet {
069 Logger logger = Logger.getLogger(ConsensusSet.class);
070
071 // A collection of all of the annotations in the ConsensusSet
072 Set<SimpleInstance> annotations;
073
074 // A collection of all of the mentions in the ConsensusSet
075 Set<SimpleInstance> mentions;
076
077 // contains each of the individual annotators in the ConsensusSet.
078 // These are obtained from the filter passed into the constructor.
079 Set<SimpleInstance> annotators;
080
081 // The team annotator that consists of each of the annotators found
082 // in the annotators member variable (previous).
083 SimpleInstance teamAnnotator;
084
085 // key is an annotator, value is a set of annotations by the annotator.
086 Map<SimpleInstance, Set<SimpleInstance>> annotatorAnnotations;
087
088 // key is a class or instance mention that is a value of a slot mention. The
089 // value is a complex slot mention.
090 Map<SimpleInstance, Set<SimpleInstance>> slotValueToComplexMention;
091
092 KnowtatorManager manager;
093
094 AnnotationUtil annotationUtil;
095
096 MentionUtil mentionUtil;
097
098 KnowtatorProjectUtil kpu;
099
100 KnowledgeBase kb;
101
102 ConsensusFrameAdapter frameListener;
103
104 boolean consolidatingAnnotations = false;
105
106 /**
107 * @param annotations
108 * some subset of the annotations that are found in a consensus
109 * set. Typically, all of the annotations corresponding to a
110 * single text source that pass through the consensusFilter
111 * @throws ConsensusException
112 */
113 /**
114 *
115 */
116 public ConsensusSet(Set<SimpleInstance> annotations, SimpleInstance consensusSet, KnowtatorManager manager)
117 throws ConsensusException {
118 logger.debug("creating consensus set");
119 this.annotations = annotations;
120 this.manager = manager;
121 this.annotationUtil = manager.getAnnotationUtil();
122 this.mentionUtil = manager.getMentionUtil();
123 this.kpu = manager.getKnowtatorProjectUtil();
124 this.kb = manager.getKnowledgeBase();
125
126 this.teamAnnotator = (SimpleInstance) consensusSet.getOwnSlotValue(kpu.getConsensusSetTeamAnnotatorSlot());
127 SimpleInstance individualFilter = (SimpleInstance) consensusSet.getOwnSlotValue(kpu
128 .getConsensusSetIndividualFilterSlot());
129 this.annotators = new HashSet<SimpleInstance>(FilterUtil.getAnnotators(individualFilter));
130
131 /*
132 * Throw some exceptions if the annotators provided by the filter do not
133 * makes sense.
134 */
135 if (teamAnnotator == null)
136 throw new ConsensusException("There is no team annotator for this consensus set."
137 + "\nPlease make sure that the provided filter has" + "\na single team annotator.");
138 if (annotators.size() < 2)
139 throw new ConsensusException("There are less than two annotators for this "
140 + "\nconsensus set. Please make sure that the " + "\nprovided filter has two or more individual "
141 + "\nannotators.");
142
143 Collection<SimpleInstance> teamAnnotators = (Collection<SimpleInstance>) teamAnnotator.getOwnSlotValues(kpu
144 .getAnnotatorTeamMembersSlot());
145 if (teamAnnotators.size() != annotators.size())
146 throw new ConsensusException("The team annotator provided by the filter is "
147 + "\nnot consistent with the individual annotators " + "\nalso specified in the filter." + "\n "
148 + teamAnnotators.size() + " annotators specified " + "\nin team annotator and " + annotators.size()
149 + " " + "\nspecified by the filter passed into the consensus " + "\nset.");
150 for (SimpleInstance tmAnnotator : teamAnnotators) {
151 if (!annotators.contains(tmAnnotator))
152 throw new ConsensusException("The team annotator provided by the filter is "
153 + "\nnot consistent with the individual annotators " + "\nalso specified in the filter"
154 + "\n TeamAnnotator=" + teamAnnotator.getBrowserText() + "\nand missing annotator="
155 + tmAnnotator.getBrowserText());
156 }
157
158 // collect all of the mentions associated with the annotations and
159 // populate annotatorAnnotations
160 mentions = new HashSet<SimpleInstance>();
161 annotatorAnnotations = new HashMap<SimpleInstance, Set<SimpleInstance>>();
162 frameListener = new ConsensusFrameAdapter();
163 for (SimpleInstance annotation : annotations) {
164 annotation.addFrameListener(frameListener);
165 SimpleInstance mention = annotationUtil.getMention(annotation);
166 mentions.addAll(mentionUtil.getAllConnectedMentions(mention));
167
168 SimpleInstance annotator = annotationUtil.getAnnotator(annotation);
169 if (!annotators.contains(annotator) && !annotator.equals(teamAnnotator))
170 throw new ConsensusException(
171 "An annotation in this set was created by an annotator other than the annotators specified by the filter or the team annotators"
172 + "\n The offending annotations is \""
173 + annotation.getBrowserText()
174 + "\" with annotator = " + annotator.getBrowserText());
175 if (!annotatorAnnotations.containsKey(annotator)) {
176 annotatorAnnotations.put(annotator, new HashSet<SimpleInstance>());
177 }
178 annotatorAnnotations.get(annotator).add(annotation);
179 }
180
181 slotValueToComplexMention = new HashMap<SimpleInstance, Set<SimpleInstance>>();
182 for (SimpleInstance mention : mentions) {
183 mention.addFrameListener(frameListener);
184
185 if (mentionUtil.isComplexSlotMention(mention)) {
186 java.util.List<Object> slotValues = mentionUtil.getSlotMentionValues(mention);
187 if (slotValues.size() > 0) {
188 for (Object slotValue : slotValues) {
189 SimpleInstance slotValueInstance = (SimpleInstance) slotValue;
190 if (!slotValueToComplexMention.containsKey(slotValueInstance))
191 slotValueToComplexMention.put(slotValueInstance, new HashSet<SimpleInstance>());
192 slotValueToComplexMention.get(slotValueInstance).add(mention);
193 }
194 }
195 }
196 }
197 }
198
199 /**
200 * This method finds all complex slot mentions that have originalMention as
201 * one of its values and 'replaces' it with newMention. The originalMention
202 * will always be removed from the set of slot values of the complex slot
203 * mention. However, newMention will only be added to the set of slot values
204 * only if it is not already there.
205 *
206 * @param originalMention
207 * @param newMention
208 */
209
210 private void replaceSlotValue(SimpleInstance originalMention, SimpleInstance newMention) {
211 if (slotValueToComplexMention.containsKey(originalMention)) {
212 Set<SimpleInstance> complexMentions = slotValueToComplexMention.get(originalMention);
213 for (SimpleInstance complexMention : complexMentions) {
214 java.util.List<Object> complexMentionValues = mentionUtil.getSlotMentionValues(complexMention);
215 if (!complexMentionValues.contains(newMention))
216 mentionUtil.addValueToSlotMention(complexMention, newMention);
217 mentionUtil.removeValueFromSlotMention(complexMention, originalMention);
218 }
219 }
220 }
221
222 /**
223 * This method consolidates two anntotations into one. It does this by
224 * removing one annotation from the consensus set (the redundantAnnotation)
225 * and making the other as annotated by the 'Team Annotator'. One of the
226 * important things that is done before the redundant annotation is deleted
227 * is to have the complex slot mentions whose value is the redundant
228 * annotation change their value to the consensusAnnotation. This is done
229 * with a call to replaceSlotValue.
230 *
231 * @param consensusAnnotation
232 * the annotation that will stick around
233 * @param redundantAnnotation
234 * the annotation that is going away
235 * @see #replaceSlotValue(SimpleInstance, SimpleInstance)
236 */
237 public void consolidateAnnotations(SimpleInstance consensusAnnotation, SimpleInstance redundantAnnotation) {
238 // set annotator of consensus annotation to the team annotator.
239 consensusAnnotation.setDirectOwnSlotValue(kpu.getAnnotationAnnotatorSlot(), teamAnnotator);
240
241 // remove redundantAnnotation from ConsensusSet member variables
242 annotations.remove(redundantAnnotation);
243 SimpleInstance annotator = annotationUtil.getAnnotator(redundantAnnotation);
244 if (annotator != null)
245 annotatorAnnotations.get(annotator).remove(redundantAnnotation);
246
247 // replace slot mentions/values that correspond to the redundant
248 // annotation with the slot mention/value of the consensus annotation
249 SimpleInstance consensusMention = annotationUtil.getMention(consensusAnnotation);
250 SimpleInstance redundantMention = annotationUtil.getMention(redundantAnnotation);
251 replaceSlotValue(redundantMention, consensusMention);
252 // remove redundantMention from ConsensusSet member variables
253 mentions.remove(redundantMention);
254 slotValueToComplexMention.remove(redundantMention);
255
256 redundantMention.removeFrameListener(frameListener);
257 redundantAnnotation.removeFrameListener(frameListener);
258 manager.deleteAnnotation(redundantAnnotation);
259 }
260
261 /**
262 * This method finds annotations that are 'redundant' in the consensus set.
263 * An annotation is considered redundant if it is created by an individual
264 * annotator and it is the same as an annotation from each of the other
265 * annotators. Two annotations are the same if they have the same same span
266 * and the corresponding mentions of the annotations are identical.
267 *
268 * @param annotation
269 * we are looking for annotations that are exactly like this one.
270 * @return the annotations that are the same as the passed in annotation.
271 * Does not contain the passed in annotation. Will not return null;
272 */
273 private Set<SimpleInstance> getRedundantAnnotations(SimpleInstance annotation) {
274 SimpleInstance mention = annotationUtil.getMention(annotation);
275 SimpleInstance annotator = annotationUtil.getAnnotator(annotation);
276 Set<SimpleInstance> matchedAnnotations = new HashSet<SimpleInstance>();
277
278 for (SimpleInstance compareAnnotator : annotators) {
279 if (!annotator.equals(compareAnnotator) && !annotator.equals(teamAnnotator)) {
280 boolean annotatorMatched = false;
281 Set<SimpleInstance> candidateAnnotations = annotatorAnnotations.get(compareAnnotator);
282 if (candidateAnnotations == null) {
283 matchedAnnotations.clear();
284 return matchedAnnotations;
285 }
286 for (SimpleInstance candidateAnnotation : candidateAnnotations) {
287 if (candidateAnnotation.isDeleted() || candidateAnnotation.isBeingDeleted())
288 continue;
289 if (strictMatch(annotation, candidateAnnotation, annotationUtil, mentionUtil)) {
290 SimpleInstance matchMention = annotationUtil.getMention(candidateAnnotation);
291 if (mentionUtil.equals(mention, matchMention, true)) {
292 matchedAnnotations.add(candidateAnnotation);
293 annotatorMatched = true;
294 // add break here if you want to make sure that
295 // annotation matches only one annotation from each
296 // annotator.
297 // It may be that the individual has created
298 // redundant annotations.
299 }
300 }
301 }
302 if (!annotatorMatched) {
303 matchedAnnotations.clear();
304 return matchedAnnotations;
305 }
306 }
307 }
308 return matchedAnnotations;
309 }
310
311 private boolean strictMatch(SimpleInstance annotation1, SimpleInstance annotation2, AnnotationUtil annotationUtil,
312 MentionUtil mentionUtil) {
313 SimpleInstance mention1 = annotationUtil.getMention(annotation1);
314 Cls mentionType1 = mentionUtil.getMentionCls(mention1);
315 SimpleInstance mention2 = annotationUtil.getMention(annotation2);
316 Cls mentionType2 = mentionUtil.getMentionCls(mention2);
317 boolean typesMatch = false;
318 if (mentionType1 == null && mentionType2 == null)
319 typesMatch = true;
320 else if (mentionType1 != null && mentionType1.equals(mentionType2))
321 typesMatch = true;
322
323 if (typesMatch && annotationUtil.compareSpans(annotation1, annotation2)) {
324 return true;
325 }
326 return false;
327 }
328
329 public void consolidateAnnotations() {
330 try {
331 consolidatingAnnotations = true;
332
333 Set<SimpleInstance> excludeAnnotations = new HashSet<SimpleInstance>();
334 // key is an annotation that we will keep, the values are redundant
335 // copies
336 Map<SimpleInstance, Set<SimpleInstance>> consolidatedAnnotations = new HashMap<SimpleInstance, Set<SimpleInstance>>();
337 // key is a mention that can be removed/replaced with the value
338 Map<SimpleInstance, SimpleInstance> consolidatedMentions = new HashMap<SimpleInstance, SimpleInstance>();
339
340 // loop through all the annotations created for the consensus set
341 // and look for identical/redundant annotations to remove.
342 // this loop will populate consolidatedAnnotations and
343 // consolidatedMentions
344 for (SimpleInstance annotation : annotations) {
345 if (annotation.isDeleted() || annotation.isBeingDeleted())
346 continue;
347 SimpleInstance annotator = annotationUtil.getAnnotator(annotation);
348 if (annotator.equals(teamAnnotator))
349 continue;
350 if (!excludeAnnotations.contains(annotation)) {
351 Set<SimpleInstance> redundantAnnotations = getRedundantAnnotations(annotation);
352 if (redundantAnnotations == null)
353 continue;
354 if (redundantAnnotations.size() > 0) {
355 consolidatedAnnotations.put(annotation, redundantAnnotations);
356 excludeAnnotations.addAll(redundantAnnotations);
357 SimpleInstance mention = annotationUtil.getMention(annotation);
358 for (SimpleInstance redundantAnnotation : redundantAnnotations) {
359 SimpleInstance redundantMention = annotationUtil.getMention(redundantAnnotation);
360 consolidatedMentions.put(redundantMention, mention);
361 }
362 }
363 }
364 }
365
366 for (SimpleInstance redundantMention : consolidatedMentions.keySet()) {
367 replaceSlotValue(redundantMention, consolidatedMentions.get(redundantMention));
368 }
369
370 for (SimpleInstance consensusAnnotation : consolidatedAnnotations.keySet()) {
371 Set<SimpleInstance> redundantAnnotations = consolidatedAnnotations.get(consensusAnnotation);
372 for (SimpleInstance redundantAnnotation : redundantAnnotations) {
373 consolidateAnnotations(consensusAnnotation, redundantAnnotation);
374 }
375 }
376 consolidatingAnnotations = false;
377 } catch (NullPointerException npe) {
378 npe.printStackTrace();
379 }
380 manager.updateCurrentAnnotations();
381 }
382
383 protected void finalize() {
384 for (SimpleInstance mention : mentions) {
385 mention.removeFrameListener(frameListener);
386 }
387 }
388
389 public void destroy() {
390 for (SimpleInstance mention : mentions) {
391 mention.removeFrameListener(frameListener);
392 }
393 for (SimpleInstance annotation : annotations) {
394 annotation.removeFrameListener(frameListener);
395 }
396 annotations.clear();
397 mentions.clear();
398 annotators.clear();
399 annotatorAnnotations.clear();
400 slotValueToComplexMention.clear();
401 }
402
403 public class ConsensusFrameAdapter extends FrameAdapter {
404 public void ownSlotValueChanged(FrameEvent event) {
405
406 if (!consolidatingAnnotations) {
407 Frame eventFrame = event.getFrame();
408
409 // TODO looks like I need to also check for mentions here.
410 if (eventFrame instanceof SimpleInstance) {
411 SimpleInstance eventInstance = (SimpleInstance) eventFrame;
412 if (annotationUtil.isAnnotation(eventInstance)) {
413 SimpleInstance annotator = annotationUtil.getAnnotator(eventInstance);
414 if (annotator != null && annotator.equals(teamAnnotator))
415 return;
416 }
417 }
418 Slot eventSlot = event.getSlot();
419 if (eventSlot == null)
420 return;
421 if (eventSlot.equals(kpu.getMentionSlotSlot()) || eventSlot.equals(kpu.getMentionSlotValueSlot())
422 || eventSlot.equals(kpu.getMentionClassSlot())
423 || eventSlot.equals(kpu.getMentionInstanceSlot())
424 || eventSlot.equals(kpu.getAnnotatedMentionSlot())
425 || eventSlot.equals(kpu.getAnnotationSpanSlot()))
426 consolidateAnnotations();
427 return;
428 }
429 }
430
431 public void deleted(FrameEvent event) {
432 // System.out.println("deleted");
433 // Frame eventFrame = event.getFrame();
434 // System.out.println("eventFrame="+eventFrame);
435 // why doesn't this spit out statements?
436 }
437 }
438
439 public SimpleInstance getTeamAnnotator() {
440 return teamAnnotator;
441 }
442 }
443
444 /*
445 * Here we find the team annotator and the individual annotators and initialize
446 * the variables teamAnnotator and annotators, respectively. If the passed in
447 * filter does not have the appropriate annotators, then an exception will be
448 * thrown.
449 */
450 // Set<SimpleInstance> filterAnnotators =
451 // FilterUtil.getAnnotators(consensusFilter);
452 // this.annotators = new HashSet<SimpleInstance>();
453 // for (SimpleInstance filterAnnotator : filterAnnotators)
454 // {
455 // if(AnnotatorUtil.isTeamAnnotator(filterAnnotator))
456 // {
457 // this.teamAnnotator = filterAnnotator;
458 // }
459 // else
460 // annotators.add(filterAnnotator);
461 // }