001 /* 002 * The contents of this file are subject to the Mozilla Public 003 * License Version 1.1 (the "License"); you may not use this file 004 * except in compliance with the License. You may obtain a copy of 005 * the License at http://www.mozilla.org/MPL/ 006 * 007 * Software distributed under the License is distributed on an "AS 008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 009 * implied. See the License for the specific language governing 010 * rights and limitations under the License. 011 * 012 * The Original Code is Knowtator. 013 * 014 * The Initial Developer of the Original Code is University of Colorado. 015 * Copyright (C) 2005 - 2008. All Rights Reserved. 016 * 017 * Knowtator was developed by the Center for Computational Pharmacology 018 * (http://compbio.uchcs.edu) at the University of Colorado Health 019 * Sciences Center School of Medicine with support from the National 020 * Library of Medicine. 021 * 022 * Current information about Knowtator can be obtained at 023 * http://knowtator.sourceforge.net/ 024 * 025 * Contributor(s): 026 * Philip V. Ogren <philip@ogren.info> (Original Author) 027 */ 028 package edu.uchsc.ccp.knowtator.util; 029 030 import java.util.Collection; 031 import java.util.HashMap; 032 import java.util.HashSet; 033 import java.util.Map; 034 import java.util.Set; 035 036 import org.apache.log4j.Logger; 037 038 import edu.stanford.smi.protege.event.FrameAdapter; 039 import edu.stanford.smi.protege.event.FrameEvent; 040 import edu.stanford.smi.protege.model.Cls; 041 import edu.stanford.smi.protege.model.Frame; 042 import edu.stanford.smi.protege.model.KnowledgeBase; 043 import edu.stanford.smi.protege.model.SimpleInstance; 044 import edu.stanford.smi.protege.model.Slot; 045 import edu.uchsc.ccp.knowtator.AnnotationUtil; 046 import edu.uchsc.ccp.knowtator.FilterUtil; 047 import edu.uchsc.ccp.knowtator.KnowtatorManager; 048 import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil; 049 import edu.uchsc.ccp.knowtator.MentionUtil; 050 051 /** 052 * 053 * @author Philip V. Ogren 054 * 055 * This class does not represent a consensus set of annotations. It 056 * serves a class that helps manage a set of consensus annotations. When 057 * the menu item 'Create Consensus Set' is run an instance of 'knowtator 058 * set' will be created. Annotations in this set define a consensus set. 059 * When redundant annotations exist in the set we want to consolidate 060 * them. This is done by identifying that there is an annotation from 061 * each of the individual annotators that is exactly the same. We can 062 * then change the annotator to one of the annotations to the team 063 * annotator and discard the others. This class facilitates the 064 * consolidation of such redundant annotations - typically one text 065 * source at a time. 066 */ 067 068 public class ConsensusSet { 069 Logger logger = Logger.getLogger(ConsensusSet.class); 070 071 // A collection of all of the annotations in the ConsensusSet 072 Set<SimpleInstance> annotations; 073 074 // A collection of all of the mentions in the ConsensusSet 075 Set<SimpleInstance> mentions; 076 077 // contains each of the individual annotators in the ConsensusSet. 078 // These are obtained from the filter passed into the constructor. 079 Set<SimpleInstance> annotators; 080 081 // The team annotator that consists of each of the annotators found 082 // in the annotators member variable (previous). 083 SimpleInstance teamAnnotator; 084 085 // key is an annotator, value is a set of annotations by the annotator. 086 Map<SimpleInstance, Set<SimpleInstance>> annotatorAnnotations; 087 088 // key is a class or instance mention that is a value of a slot mention. The 089 // value is a complex slot mention. 090 Map<SimpleInstance, Set<SimpleInstance>> slotValueToComplexMention; 091 092 KnowtatorManager manager; 093 094 AnnotationUtil annotationUtil; 095 096 MentionUtil mentionUtil; 097 098 KnowtatorProjectUtil kpu; 099 100 KnowledgeBase kb; 101 102 ConsensusFrameAdapter frameListener; 103 104 boolean consolidatingAnnotations = false; 105 106 /** 107 * @param annotations 108 * some subset of the annotations that are found in a consensus 109 * set. Typically, all of the annotations corresponding to a 110 * single text source that pass through the consensusFilter 111 * @throws ConsensusException 112 */ 113 /** 114 * 115 */ 116 public ConsensusSet(Set<SimpleInstance> annotations, SimpleInstance consensusSet, KnowtatorManager manager) 117 throws ConsensusException { 118 logger.debug("creating consensus set"); 119 this.annotations = annotations; 120 this.manager = manager; 121 this.annotationUtil = manager.getAnnotationUtil(); 122 this.mentionUtil = manager.getMentionUtil(); 123 this.kpu = manager.getKnowtatorProjectUtil(); 124 this.kb = manager.getKnowledgeBase(); 125 126 this.teamAnnotator = (SimpleInstance) consensusSet.getOwnSlotValue(kpu.getConsensusSetTeamAnnotatorSlot()); 127 SimpleInstance individualFilter = (SimpleInstance) consensusSet.getOwnSlotValue(kpu 128 .getConsensusSetIndividualFilterSlot()); 129 this.annotators = new HashSet<SimpleInstance>(FilterUtil.getAnnotators(individualFilter)); 130 131 /* 132 * Throw some exceptions if the annotators provided by the filter do not 133 * makes sense. 134 */ 135 if (teamAnnotator == null) 136 throw new ConsensusException("There is no team annotator for this consensus set." 137 + "\nPlease make sure that the provided filter has" + "\na single team annotator."); 138 if (annotators.size() < 2) 139 throw new ConsensusException("There are less than two annotators for this " 140 + "\nconsensus set. Please make sure that the " + "\nprovided filter has two or more individual " 141 + "\nannotators."); 142 143 Collection<SimpleInstance> teamAnnotators = (Collection<SimpleInstance>) teamAnnotator.getOwnSlotValues(kpu 144 .getAnnotatorTeamMembersSlot()); 145 if (teamAnnotators.size() != annotators.size()) 146 throw new ConsensusException("The team annotator provided by the filter is " 147 + "\nnot consistent with the individual annotators " + "\nalso specified in the filter." + "\n " 148 + teamAnnotators.size() + " annotators specified " + "\nin team annotator and " + annotators.size() 149 + " " + "\nspecified by the filter passed into the consensus " + "\nset."); 150 for (SimpleInstance tmAnnotator : teamAnnotators) { 151 if (!annotators.contains(tmAnnotator)) 152 throw new ConsensusException("The team annotator provided by the filter is " 153 + "\nnot consistent with the individual annotators " + "\nalso specified in the filter" 154 + "\n TeamAnnotator=" + teamAnnotator.getBrowserText() + "\nand missing annotator=" 155 + tmAnnotator.getBrowserText()); 156 } 157 158 // collect all of the mentions associated with the annotations and 159 // populate annotatorAnnotations 160 mentions = new HashSet<SimpleInstance>(); 161 annotatorAnnotations = new HashMap<SimpleInstance, Set<SimpleInstance>>(); 162 frameListener = new ConsensusFrameAdapter(); 163 for (SimpleInstance annotation : annotations) { 164 annotation.addFrameListener(frameListener); 165 SimpleInstance mention = annotationUtil.getMention(annotation); 166 mentions.addAll(mentionUtil.getAllConnectedMentions(mention)); 167 168 SimpleInstance annotator = annotationUtil.getAnnotator(annotation); 169 if (!annotators.contains(annotator) && !annotator.equals(teamAnnotator)) 170 throw new ConsensusException( 171 "An annotation in this set was created by an annotator other than the annotators specified by the filter or the team annotators" 172 + "\n The offending annotations is \"" 173 + annotation.getBrowserText() 174 + "\" with annotator = " + annotator.getBrowserText()); 175 if (!annotatorAnnotations.containsKey(annotator)) { 176 annotatorAnnotations.put(annotator, new HashSet<SimpleInstance>()); 177 } 178 annotatorAnnotations.get(annotator).add(annotation); 179 } 180 181 slotValueToComplexMention = new HashMap<SimpleInstance, Set<SimpleInstance>>(); 182 for (SimpleInstance mention : mentions) { 183 mention.addFrameListener(frameListener); 184 185 if (mentionUtil.isComplexSlotMention(mention)) { 186 java.util.List<Object> slotValues = mentionUtil.getSlotMentionValues(mention); 187 if (slotValues.size() > 0) { 188 for (Object slotValue : slotValues) { 189 SimpleInstance slotValueInstance = (SimpleInstance) slotValue; 190 if (!slotValueToComplexMention.containsKey(slotValueInstance)) 191 slotValueToComplexMention.put(slotValueInstance, new HashSet<SimpleInstance>()); 192 slotValueToComplexMention.get(slotValueInstance).add(mention); 193 } 194 } 195 } 196 } 197 } 198 199 /** 200 * This method finds all complex slot mentions that have originalMention as 201 * one of its values and 'replaces' it with newMention. The originalMention 202 * will always be removed from the set of slot values of the complex slot 203 * mention. However, newMention will only be added to the set of slot values 204 * only if it is not already there. 205 * 206 * @param originalMention 207 * @param newMention 208 */ 209 210 private void replaceSlotValue(SimpleInstance originalMention, SimpleInstance newMention) { 211 if (slotValueToComplexMention.containsKey(originalMention)) { 212 Set<SimpleInstance> complexMentions = slotValueToComplexMention.get(originalMention); 213 for (SimpleInstance complexMention : complexMentions) { 214 java.util.List<Object> complexMentionValues = mentionUtil.getSlotMentionValues(complexMention); 215 if (!complexMentionValues.contains(newMention)) 216 mentionUtil.addValueToSlotMention(complexMention, newMention); 217 mentionUtil.removeValueFromSlotMention(complexMention, originalMention); 218 } 219 } 220 } 221 222 /** 223 * This method consolidates two anntotations into one. It does this by 224 * removing one annotation from the consensus set (the redundantAnnotation) 225 * and making the other as annotated by the 'Team Annotator'. One of the 226 * important things that is done before the redundant annotation is deleted 227 * is to have the complex slot mentions whose value is the redundant 228 * annotation change their value to the consensusAnnotation. This is done 229 * with a call to replaceSlotValue. 230 * 231 * @param consensusAnnotation 232 * the annotation that will stick around 233 * @param redundantAnnotation 234 * the annotation that is going away 235 * @see #replaceSlotValue(SimpleInstance, SimpleInstance) 236 */ 237 public void consolidateAnnotations(SimpleInstance consensusAnnotation, SimpleInstance redundantAnnotation) { 238 // set annotator of consensus annotation to the team annotator. 239 consensusAnnotation.setDirectOwnSlotValue(kpu.getAnnotationAnnotatorSlot(), teamAnnotator); 240 241 // remove redundantAnnotation from ConsensusSet member variables 242 annotations.remove(redundantAnnotation); 243 SimpleInstance annotator = annotationUtil.getAnnotator(redundantAnnotation); 244 if (annotator != null) 245 annotatorAnnotations.get(annotator).remove(redundantAnnotation); 246 247 // replace slot mentions/values that correspond to the redundant 248 // annotation with the slot mention/value of the consensus annotation 249 SimpleInstance consensusMention = annotationUtil.getMention(consensusAnnotation); 250 SimpleInstance redundantMention = annotationUtil.getMention(redundantAnnotation); 251 replaceSlotValue(redundantMention, consensusMention); 252 // remove redundantMention from ConsensusSet member variables 253 mentions.remove(redundantMention); 254 slotValueToComplexMention.remove(redundantMention); 255 256 redundantMention.removeFrameListener(frameListener); 257 redundantAnnotation.removeFrameListener(frameListener); 258 manager.deleteAnnotation(redundantAnnotation); 259 } 260 261 /** 262 * This method finds annotations that are 'redundant' in the consensus set. 263 * An annotation is considered redundant if it is created by an individual 264 * annotator and it is the same as an annotation from each of the other 265 * annotators. Two annotations are the same if they have the same same span 266 * and the corresponding mentions of the annotations are identical. 267 * 268 * @param annotation 269 * we are looking for annotations that are exactly like this one. 270 * @return the annotations that are the same as the passed in annotation. 271 * Does not contain the passed in annotation. Will not return null; 272 */ 273 private Set<SimpleInstance> getRedundantAnnotations(SimpleInstance annotation) { 274 SimpleInstance mention = annotationUtil.getMention(annotation); 275 SimpleInstance annotator = annotationUtil.getAnnotator(annotation); 276 Set<SimpleInstance> matchedAnnotations = new HashSet<SimpleInstance>(); 277 278 for (SimpleInstance compareAnnotator : annotators) { 279 if (!annotator.equals(compareAnnotator) && !annotator.equals(teamAnnotator)) { 280 boolean annotatorMatched = false; 281 Set<SimpleInstance> candidateAnnotations = annotatorAnnotations.get(compareAnnotator); 282 if (candidateAnnotations == null) { 283 matchedAnnotations.clear(); 284 return matchedAnnotations; 285 } 286 for (SimpleInstance candidateAnnotation : candidateAnnotations) { 287 if (candidateAnnotation.isDeleted() || candidateAnnotation.isBeingDeleted()) 288 continue; 289 if (strictMatch(annotation, candidateAnnotation, annotationUtil, mentionUtil)) { 290 SimpleInstance matchMention = annotationUtil.getMention(candidateAnnotation); 291 if (mentionUtil.equals(mention, matchMention, true)) { 292 matchedAnnotations.add(candidateAnnotation); 293 annotatorMatched = true; 294 // add break here if you want to make sure that 295 // annotation matches only one annotation from each 296 // annotator. 297 // It may be that the individual has created 298 // redundant annotations. 299 } 300 } 301 } 302 if (!annotatorMatched) { 303 matchedAnnotations.clear(); 304 return matchedAnnotations; 305 } 306 } 307 } 308 return matchedAnnotations; 309 } 310 311 private boolean strictMatch(SimpleInstance annotation1, SimpleInstance annotation2, AnnotationUtil annotationUtil, 312 MentionUtil mentionUtil) { 313 SimpleInstance mention1 = annotationUtil.getMention(annotation1); 314 Cls mentionType1 = mentionUtil.getMentionCls(mention1); 315 SimpleInstance mention2 = annotationUtil.getMention(annotation2); 316 Cls mentionType2 = mentionUtil.getMentionCls(mention2); 317 boolean typesMatch = false; 318 if (mentionType1 == null && mentionType2 == null) 319 typesMatch = true; 320 else if (mentionType1 != null && mentionType1.equals(mentionType2)) 321 typesMatch = true; 322 323 if (typesMatch && annotationUtil.compareSpans(annotation1, annotation2)) { 324 return true; 325 } 326 return false; 327 } 328 329 public void consolidateAnnotations() { 330 try { 331 consolidatingAnnotations = true; 332 333 Set<SimpleInstance> excludeAnnotations = new HashSet<SimpleInstance>(); 334 // key is an annotation that we will keep, the values are redundant 335 // copies 336 Map<SimpleInstance, Set<SimpleInstance>> consolidatedAnnotations = new HashMap<SimpleInstance, Set<SimpleInstance>>(); 337 // key is a mention that can be removed/replaced with the value 338 Map<SimpleInstance, SimpleInstance> consolidatedMentions = new HashMap<SimpleInstance, SimpleInstance>(); 339 340 // loop through all the annotations created for the consensus set 341 // and look for identical/redundant annotations to remove. 342 // this loop will populate consolidatedAnnotations and 343 // consolidatedMentions 344 for (SimpleInstance annotation : annotations) { 345 if (annotation.isDeleted() || annotation.isBeingDeleted()) 346 continue; 347 SimpleInstance annotator = annotationUtil.getAnnotator(annotation); 348 if (annotator.equals(teamAnnotator)) 349 continue; 350 if (!excludeAnnotations.contains(annotation)) { 351 Set<SimpleInstance> redundantAnnotations = getRedundantAnnotations(annotation); 352 if (redundantAnnotations == null) 353 continue; 354 if (redundantAnnotations.size() > 0) { 355 consolidatedAnnotations.put(annotation, redundantAnnotations); 356 excludeAnnotations.addAll(redundantAnnotations); 357 SimpleInstance mention = annotationUtil.getMention(annotation); 358 for (SimpleInstance redundantAnnotation : redundantAnnotations) { 359 SimpleInstance redundantMention = annotationUtil.getMention(redundantAnnotation); 360 consolidatedMentions.put(redundantMention, mention); 361 } 362 } 363 } 364 } 365 366 for (SimpleInstance redundantMention : consolidatedMentions.keySet()) { 367 replaceSlotValue(redundantMention, consolidatedMentions.get(redundantMention)); 368 } 369 370 for (SimpleInstance consensusAnnotation : consolidatedAnnotations.keySet()) { 371 Set<SimpleInstance> redundantAnnotations = consolidatedAnnotations.get(consensusAnnotation); 372 for (SimpleInstance redundantAnnotation : redundantAnnotations) { 373 consolidateAnnotations(consensusAnnotation, redundantAnnotation); 374 } 375 } 376 consolidatingAnnotations = false; 377 } catch (NullPointerException npe) { 378 npe.printStackTrace(); 379 } 380 manager.updateCurrentAnnotations(); 381 } 382 383 protected void finalize() { 384 for (SimpleInstance mention : mentions) { 385 mention.removeFrameListener(frameListener); 386 } 387 } 388 389 public void destroy() { 390 for (SimpleInstance mention : mentions) { 391 mention.removeFrameListener(frameListener); 392 } 393 for (SimpleInstance annotation : annotations) { 394 annotation.removeFrameListener(frameListener); 395 } 396 annotations.clear(); 397 mentions.clear(); 398 annotators.clear(); 399 annotatorAnnotations.clear(); 400 slotValueToComplexMention.clear(); 401 } 402 403 public class ConsensusFrameAdapter extends FrameAdapter { 404 public void ownSlotValueChanged(FrameEvent event) { 405 406 if (!consolidatingAnnotations) { 407 Frame eventFrame = event.getFrame(); 408 409 // TODO looks like I need to also check for mentions here. 410 if (eventFrame instanceof SimpleInstance) { 411 SimpleInstance eventInstance = (SimpleInstance) eventFrame; 412 if (annotationUtil.isAnnotation(eventInstance)) { 413 SimpleInstance annotator = annotationUtil.getAnnotator(eventInstance); 414 if (annotator != null && annotator.equals(teamAnnotator)) 415 return; 416 } 417 } 418 Slot eventSlot = event.getSlot(); 419 if (eventSlot == null) 420 return; 421 if (eventSlot.equals(kpu.getMentionSlotSlot()) || eventSlot.equals(kpu.getMentionSlotValueSlot()) 422 || eventSlot.equals(kpu.getMentionClassSlot()) 423 || eventSlot.equals(kpu.getMentionInstanceSlot()) 424 || eventSlot.equals(kpu.getAnnotatedMentionSlot()) 425 || eventSlot.equals(kpu.getAnnotationSpanSlot())) 426 consolidateAnnotations(); 427 return; 428 } 429 } 430 431 public void deleted(FrameEvent event) { 432 // System.out.println("deleted"); 433 // Frame eventFrame = event.getFrame(); 434 // System.out.println("eventFrame="+eventFrame); 435 // why doesn't this spit out statements? 436 } 437 } 438 439 public SimpleInstance getTeamAnnotator() { 440 return teamAnnotator; 441 } 442 } 443 444 /* 445 * Here we find the team annotator and the individual annotators and initialize 446 * the variables teamAnnotator and annotators, respectively. If the passed in 447 * filter does not have the appropriate annotators, then an exception will be 448 * thrown. 449 */ 450 // Set<SimpleInstance> filterAnnotators = 451 // FilterUtil.getAnnotators(consensusFilter); 452 // this.annotators = new HashSet<SimpleInstance>(); 453 // for (SimpleInstance filterAnnotator : filterAnnotators) 454 // { 455 // if(AnnotatorUtil.isTeamAnnotator(filterAnnotator)) 456 // { 457 // this.teamAnnotator = filterAnnotator; 458 // } 459 // else 460 // annotators.add(filterAnnotator); 461 // }