001 /* 002 * The contents of this file are subject to the Mozilla Public 003 * License Version 1.1 (the "License"); you may not use this file 004 * except in compliance with the License. You may obtain a copy of 005 * the License at http://www.mozilla.org/MPL/ 006 * 007 * Software distributed under the License is distributed on an "AS 008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 009 * implied. See the License for the specific language governing 010 * rights and limitations under the License. 011 * 012 * The Original Code is Knowtator. 013 * 014 * The Initial Developer of the Original Code is University of Colorado. 015 * Copyright (C) 2005 - 2008. All Rights Reserved. 016 * 017 * Knowtator was developed by the Center for Computational Pharmacology 018 * (http://compbio.uchcs.edu) at the University of Colorado Health 019 * Sciences Center School of Medicine with support from the National 020 * Library of Medicine. 021 * 022 * Current information about Knowtator can be obtained at 023 * http://knowtator.sourceforge.net/ 024 * 025 * Contributor(s): 026 * Philip V. Ogren <philip@ogren.info> (Original Author) 027 */ 028 029 package edu.uchsc.ccp.knowtator; 030 031 import java.util.ArrayList; 032 import java.util.Collection; 033 import java.util.Collections; 034 import java.util.HashMap; 035 import java.util.HashSet; 036 import java.util.List; 037 import java.util.Map; 038 import java.util.Set; 039 040 import edu.stanford.smi.protege.model.Cls; 041 import edu.stanford.smi.protege.model.Instance; 042 import edu.stanford.smi.protege.model.KnowledgeBase; 043 import edu.stanford.smi.protege.model.Project; 044 import edu.stanford.smi.protege.model.SimpleInstance; 045 import edu.stanford.smi.protege.util.CollectionUtilities; 046 import edu.uchsc.ccp.knowtator.event.SpanEditEvent; 047 import edu.uchsc.ccp.knowtator.event.SpanEditListener; 048 import edu.uchsc.ccp.knowtator.textsource.TextSource; 049 import edu.uchsc.ccp.knowtator.textsource.TextSourceAccessException; 050 import edu.uchsc.ccp.knowtator.util.ProtegeUtil; 051 052 /** 053 * Authors: Philip V. Ogren Created: September, 2004 Description: This class 054 * aids in creating, editing and retrieving annotation intances as well as the 055 * supporting annotation classes. The annotation model is defined in 056 * annotations.pprj. This project must be included when the annotator plug-in is 057 * used. Todo: AnnotationUtil should really not have an instance of knowtator as 058 * a member variable. I need to create the appropriate listeners, so that 059 * Knowtator can take care of itself as it see fits. Changes: 02/28/2005 added 060 * annotation filter functionality 05/02/2005 package changed to ...knowtator 061 * 8/11/2005 pvo added getReferencedAnnotations method 10/04/2005 Changed 062 * signature of main createAnnotations method so that a string for the spanned 063 * text can be passed in. This removes the necessity of gathering the spanned 064 * text from the actual text source which may be an unnecessary task if you are 065 * copying an annotation (e.g. as in MergeAnnotations). This change had a small 066 * ripple effect on other methods: setSpans, updateAnnotationText 10/04/2005 067 * Changed name of deleteAnnotationMention to deleteMention (I just didn't like 068 * the name!) 069 */ 070 071 public class AnnotationUtil implements SpanEditListener { 072 073 KnowtatorManager manager; 074 075 KnowtatorProjectUtil kpu; 076 077 KnowledgeBase kb; 078 079 Project project; 080 081 TextSourceUtil textSourceUtil; 082 083 MentionUtil mentionUtil; 084 085 Map<SimpleInstance, java.util.List<Span>> annotationSpansCache; 086 087 public AnnotationUtil(KnowtatorManager manager) { 088 this.manager = manager; 089 kpu = manager.getKnowtatorProjectUtil(); 090 kb = manager.getKnowledgeBase(); 091 this.project = kb.getProject(); 092 annotationSpansCache = new HashMap<SimpleInstance, java.util.List<Span>>(); 093 // EventHandler.getInstance().addSpanEditListener(this); //not needed 094 // because SpanUtil updates list retrieved from getSpans() 095 } 096 097 public void setTextSourceUtil(TextSourceUtil textSourceUtil) { 098 this.textSourceUtil = textSourceUtil; 099 } 100 101 public void setMentionUtil(MentionUtil mentionUtil) { 102 this.mentionUtil = mentionUtil; 103 } 104 105 /** 106 * This method deletes the mention of annotation. 107 * 108 * Although it is not possible with the current interface to associate a 109 * mention with more than one annotation, there is no other reason why this 110 * can't happen. If a mention is associated with more than one annotation, 111 * then the mention will not be deleted. 112 * 113 * @param annotation 114 * the mention of this annotation will be deleted via 115 * mentionUtil.deleteMention. 116 * 117 */ 118 119 public void deleteMention(SimpleInstance annotation) { 120 SimpleInstance mention = getMention(annotation); 121 122 if (mention != null) { 123 mentionUtil.deleteMention(mention); 124 } 125 126 } 127 128 public void spanEditted(SpanEditEvent see) { 129 annotationSpansCache.remove(see.getAnnotation()); 130 } 131 132 /** 133 * Returns a list of Span objects that correspond to the span values given 134 * to an annotation instance. The spans will be ordered as defined by the 135 * Comparable interface implementation in Span. If a span value from an 136 * annotation does not parse (i.e. has a string value that is not in the 137 * correct span format), then an InvalidSpanException will be thrown. 138 * 139 * @param annotation 140 * the spans for the annotation will be returned 141 * @return a list of spans corresponding to the annotation's 142 * kpu.getAnnotationSpanSlot() 143 */ 144 public java.util.List<Span> getSpans(SimpleInstance annotation) throws InvalidSpanException { 145 return getSpans(annotation, false); 146 } 147 148 /** 149 * Returns a list of Span objects that correspond to the span values given 150 * to an annotation instance. The spans will be ordered as defined by the 151 * Comparable interface implementation in Span. If a span value from an 152 * annotation does not parse (i.e. has a string value that is not in the 153 * correct span format), then an InvalidSpanException will be thrown. 154 * 155 * @param annotation 156 * the spans for the annotation will be returned 157 * @return a list of spans corresponding to the annotation's 158 * kpu.getAnnotationSpanSlot() 159 */ 160 public java.util.List<Span> getSpans(SimpleInstance annotation, boolean ignoreCache) throws InvalidSpanException { 161 if (annotation == null) 162 return Collections.emptyList(); 163 if (annotationSpansCache.containsKey(annotation) && !ignoreCache) { 164 return annotationSpansCache.get(annotation); 165 } 166 167 Collection<String> spanStrings = ProtegeUtil.castStrings(annotation.getOwnSlotValues(kpu.annotationSpanSlot)); 168 169 ArrayList<Span> spans = new ArrayList<Span>(); 170 for (String spanString : spanStrings) { 171 spans.add(Span.parseSpan(spanString)); 172 } 173 174 Collections.sort(spans); 175 annotationSpansCache.put(annotation, spans); 176 return spans; 177 } 178 179 /** 180 * Sets the spans slot for an annotation instance. If a 181 * 182 * @param annotation 183 * set the span slot for the passed in annotation 184 * @param spans 185 * a list of spans for the annotation. This will typically be a 186 * list of 1 span object, but may occasionally have a 2 or more 187 * spans. 188 * @param spannedText 189 * if null, then the text from the text source for the annotation 190 * will be used to determine the spannedText. If the spannedText 191 * is already known, then it may be useful to pass in the 192 * spannedText. Passing in an empty string will cause the 193 * spannedText to be an empty string. 194 */ 195 public void setSpans(SimpleInstance annotation, java.util.List<Span> spans, String spannedText) 196 throws TextSourceAccessException { 197 ArrayList<String> spanStrings = new ArrayList<String>(spans.size()); 198 for (Span span : spans) { 199 spanStrings.add(span.toString()); 200 } 201 annotation.setOwnSlotValues(kpu.annotationSpanSlot, spanStrings); 202 if (spannedText == null && spans.size() > 0) 203 updateSpannedText(annotation, spans); 204 else { 205 setText(annotation, spannedText); 206 } 207 annotationSpansCache.put(annotation, spans); 208 } 209 210 /** 211 * updates the spanned text of an annotation of a text source. This is does 212 * this by calling TextSource.getText(spans). If TextSource cannot be found 213 * using TextSourceUtil.getTextSource(TextSource), then a 214 * TextSourceAccessException is thrown. 215 * 216 * @param annotation 217 * the annotation whose spanned text is being updated 218 * @param spans 219 * the spans corresponding to offsets in the text source. 220 */ 221 private void updateSpannedText(SimpleInstance annotation, java.util.List<Span> spans) 222 throws TextSourceAccessException { 223 SimpleInstance textSource = getTextSource(annotation); 224 if (textSource != null) { 225 TextSource ts = textSourceUtil.getTextSource(textSource); 226 String spannedText = ts.getText(spans); 227 setText(annotation, spannedText); 228 } 229 } 230 231 /** 232 * Returns the "annotation" instances associated with a TextSource if the 233 * TextSource does not have a corresponding "text source" instance in the 234 * knowledgebase, then null is returned. If the "text source" instance does 235 * exist then all of the "annotation" instances are found and returned. If 236 * none are found, an empty array of instances is returned. 237 */ 238 239 public Collection<SimpleInstance> getAnnotations(TextSource textSource) { 240 SimpleInstance textSourceInstance = textSourceUtil.getTextSourceInstance(textSource, false); 241 if (textSourceInstance == null) 242 return null; 243 else 244 return getAnnotations(textSourceInstance); 245 } 246 247 public Collection<SimpleInstance> getAnnotations(String textSourceName) { 248 SimpleInstance textSourceInstance = (SimpleInstance) kb.getInstance(textSourceName); 249 if (textSourceInstance != null) { 250 return getAnnotations(textSourceInstance); 251 } else { 252 return null; 253 } 254 } 255 256 /** 257 * Does the work of finding all "annotation" instances that have the 258 * textSourceInstance as the value of "annotation_text_source" slot. 259 */ 260 public Collection<SimpleInstance> getAnnotations(SimpleInstance textSourceInstance) { 261 Collection<Instance> annotations = kb.getInstances(kpu.getAnnotationCls()); 262 Collection<SimpleInstance> returnValues = new ArrayList<SimpleInstance>(); 263 for (Instance ann : annotations) { 264 SimpleInstance annotation = (SimpleInstance) ann; 265 SimpleInstance ts = getTextSource(annotation); 266 if (ts != null && ts.equals(textSourceInstance)) { 267 returnValues.add(annotation); 268 } 269 } 270 return returnValues; 271 } 272 273 public SimpleInstance createAnnotation(Cls annotationCls, java.util.List<Span> spans, String spannedText, 274 String textSourceName) throws TextSourceAccessException { 275 return createAnnotation(annotationCls, spans, spannedText, textSourceName, null, null); 276 } 277 278 public SimpleInstance createAnnotation(Cls annotationCls, java.util.List<Span> spans, String spannedText, 279 String textSourceName, SimpleInstance annotator, SimpleInstance set) throws TextSourceAccessException { 280 SimpleInstance mention = mentionUtil.createMention(annotationCls); 281 SimpleInstance textSource = kb.getSimpleInstance(textSourceName); 282 if (textSource == null) 283 textSource = kb.createSimpleInstance(null, textSourceName, CollectionUtilities.createCollection(kpu.getTextSourceCls()), 284 true); 285 286 Collection<SimpleInstance> annotationSets = new ArrayList<SimpleInstance>(); 287 if (set != null) { 288 annotationSets.add(set); 289 } 290 return createAnnotation(mention, annotator, spans, spannedText, textSource, annotationSets); 291 } 292 293 /** 294 * If the textSource does not have a corresponding instance in the kb, then 295 * one will be created. 296 */ 297 298 public SimpleInstance createAnnotation(SimpleInstance mention, SimpleInstance annotator, 299 java.util.List<Span> spans, TextSource textSource, SimpleInstance annotationSet) 300 throws TextSourceAccessException { 301 SimpleInstance textSourceInstance = textSourceUtil.getTextSourceInstance(textSource, true); 302 303 return createAnnotation(mention, annotator, spans, textSourceInstance, annotationSet); 304 } 305 306 public SimpleInstance createAnnotation(SimpleInstance mention, SimpleInstance annotator, 307 java.util.List<Span> spans, SimpleInstance textSourceInstance, SimpleInstance annotationSet) 308 throws TextSourceAccessException { 309 Collection<SimpleInstance> annotationSets = new ArrayList<SimpleInstance>(); 310 if (annotationSet != null) { 311 annotationSets.add(annotationSet); 312 } 313 314 return createAnnotation(mention, annotator, spans, null, textSourceInstance, annotationSets); 315 } 316 317 public SimpleInstance createAnnotation(SimpleInstance mention, SimpleInstance annotator, 318 java.util.List<Span> spans, String spannedText, SimpleInstance textSourceInstance, 319 java.util.Collection<SimpleInstance> annotationSets) throws TextSourceAccessException { 320 SimpleInstance annotationInstance = kb.createSimpleInstance(null, null, CollectionUtilities 321 .createCollection(kpu.annotationCls), true); 322 if (mention != null) { 323 annotationInstance.setOwnSlotValue(kpu.annotatedMentionSlot, mention); 324 } 325 326 if (annotator != null) { 327 annotationInstance.setOwnSlotValue(kpu.annotationAnnotatorSlot, annotator); 328 } 329 330 if (annotationSets != null && annotationSets.size() > 0) { 331 annotationInstance.setOwnSlotValues(kpu.setSlot, annotationSets); 332 } 333 334 if (textSourceInstance != null) { 335 annotationInstance.setOwnSlotValue(kpu.annotationTextSourceSlot, textSourceInstance); 336 } 337 338 setSpans(annotationInstance, spans, spannedText); 339 340 return annotationInstance; 341 342 } 343 344 /** 345 * Creates an annotation. Overloaded to include the creation date in the 346 * annotation. Designed to be used for creating the annotation while 347 * importing an XML file. 348 * 349 * @param creationDate 350 * A String representation of the date (time stamp) the 351 * annotation was created. 352 * 353 * @throws TextSourceAccessException 354 */ 355 public SimpleInstance createAnnotation(SimpleInstance mention, SimpleInstance annotator, 356 java.util.List<Span> spans, String spannedText, SimpleInstance textSourceInstance, 357 java.util.Collection<SimpleInstance> annotationSets, String creationDate) throws TextSourceAccessException { 358 359 SimpleInstance annotationInstance = createAnnotation(mention, annotator, spans, spannedText, 360 textSourceInstance, annotationSets); 361 362 if (creationDate != null) { 363 setCreationDate(annotationInstance, creationDate); 364 } 365 366 return annotationInstance; 367 } 368 369 /** 370 * This method returns true if two annotations have exactly the same spans. 371 */ 372 public boolean compareSpans(SimpleInstance annotation1, SimpleInstance annotation2) { 373 try { 374 java.util.List<Span> spans1 = getSpans(annotation1); 375 java.util.List<Span> spans2 = getSpans(annotation2); 376 377 return Span.spansMatch(spans1, spans2); 378 } catch (InvalidSpanException ise) { 379 return false; 380 } 381 } 382 383 public boolean compareSpans(List<SimpleInstance> annotations) { 384 for (int i = 1; i < annotations.size(); i++) { 385 if (!compareSpans(annotations.get(0), annotations.get(i))) 386 return false; 387 } 388 return true; 389 } 390 391 public boolean isAnnotation(SimpleInstance annotation) { 392 if (annotation == null) 393 return false; 394 395 Cls type = annotation.getDirectType(); 396 if (type == null) 397 return false; 398 if (type.equals(kpu.getAnnotationCls())) { 399 return true; 400 } 401 return type.getSuperclasses().contains(kpu.getAnnotationCls()); 402 } 403 404 public boolean hasTeamAnnotator(SimpleInstance annotation) { 405 SimpleInstance annotator = getAnnotator(annotation); 406 if (annotator == null) 407 return false; 408 Cls annotatorType = annotator.getDirectType(); 409 if (annotatorType == null) 410 return false; 411 if (annotatorType.equals(kpu.getTeamAnnotatorCls())) 412 return true; 413 return annotatorType.getSuperclasses().contains(kpu.getTeamAnnotatorCls()); 414 415 } 416 417 public void setProjectAnnotator(SimpleInstance annotation) { 418 if (isAnnotation(annotation)) { 419 setAnnotator(annotation, manager.getSelectedAnnotator()); 420 } 421 } 422 423 public void setProjectAnnotationSet(SimpleInstance annotation) { 424 if (isAnnotation(annotation)) { 425 setSet(annotation, manager.getSelectedAnnotationSet()); 426 } 427 } 428 429 /** 430 * Returns all annotations that are related to the passed in annotation. If 431 * an annotation is the slot value of another annotation (or more correctly 432 * if the mention of an annotation is a slot mention's value of another 433 * annotation's mention). 434 * 435 * This method recursively gathers all annotations that are related (not 436 * just directly related). 437 */ 438 439 public Set<SimpleInstance> getRelatedAnnotations(SimpleInstance annotation) { 440 Set<SimpleInstance> referencedAnnotations = new HashSet<SimpleInstance>(); 441 _getRelatedAnnotations(annotation, referencedAnnotations); 442 return referencedAnnotations; 443 } 444 445 private void _getRelatedAnnotations(SimpleInstance annotation, Set<SimpleInstance> referencedAnnotations) { 446 SimpleInstance mentionInstance = getMention(annotation); 447 if (mentionInstance != null) { 448 List<SimpleInstance> referencedMentions = mentionUtil.getRelatedMentions(mentionInstance); 449 for (SimpleInstance referencedMention : referencedMentions) { 450 SimpleInstance ann = mentionUtil.getMentionAnnotation(referencedMention); 451 if (!referencedAnnotations.contains(ann)) { 452 referencedAnnotations.add(ann); 453 _getRelatedAnnotations(ann, referencedAnnotations); 454 } 455 } 456 } 457 } 458 459 public List<SimpleInstance> retrieveAllAnnotations() { 460 Collection<SimpleInstance> instances = ProtegeUtil.castSimpleInstances(kpu.getAnnotationCls().getInstances()); 461 List<SimpleInstance> annotations = new ArrayList<SimpleInstance>(instances); 462 return annotations; 463 } 464 465 public void setAnnotator(SimpleInstance annotation, SimpleInstance annotator) { 466 if (isAnnotation(annotation) && annotator != null) { 467 annotation.setOwnSlotValue(kpu.getAnnotationAnnotatorSlot(), annotator); 468 } 469 } 470 471 public void setSet(SimpleInstance annotation, SimpleInstance set) { 472 Collection<SimpleInstance> sets = new ArrayList<SimpleInstance>(); 473 sets.add(set); 474 setSets(annotation, sets); 475 } 476 477 public void setSets(SimpleInstance annotation, Collection<SimpleInstance> sets) { 478 if (sets == null) { 479 sets = Collections.emptyList(); 480 } 481 if (isAnnotation(annotation)) { 482 annotation.setOwnSlotValues(kpu.getSetSlot(), sets); 483 } 484 } 485 486 public Set<SimpleInstance> getSets(SimpleInstance annotation) { 487 if (isAnnotation(annotation)) { 488 Collection<SimpleInstance> sets = ProtegeUtil.castSimpleInstances(annotation.getOwnSlotValues(kpu 489 .getSetSlot())); 490 if (sets != null) { 491 return new HashSet<SimpleInstance>(sets); 492 } 493 } 494 return Collections.emptySet(); 495 } 496 497 public SimpleInstance getTextSource(SimpleInstance annotation) { 498 if (isAnnotation(annotation)) { 499 SimpleInstance textSource = (SimpleInstance) annotation.getOwnSlotValue(kpu.getAnnotationTextSourceSlot()); 500 return textSource; 501 } 502 return null; 503 } 504 505 public void setTextSource(SimpleInstance annotation, SimpleInstance textSource) { 506 if (isAnnotation(annotation)) { 507 annotation.setOwnSlotValue(kpu.getAnnotationTextSourceSlot(), textSource); 508 } 509 } 510 511 public String getText(SimpleInstance annotation) { 512 if (isAnnotation(annotation)) { 513 return (String) annotation.getOwnSlotValue(kpu.getAnnotationTextSlot()); 514 } 515 return null; 516 } 517 518 public void setText(SimpleInstance annotation, String spannedText) { 519 if (isAnnotation(annotation)) { 520 annotation.setOwnSlotValue(kpu.getAnnotationTextSlot(), spannedText); 521 } 522 } 523 524 public boolean spansOverlap(SimpleInstance annotation1, SimpleInstance annotation2) { 525 try { 526 List<Span> spans1 = getSpans(annotation1); 527 List<Span> spans2 = getSpans(annotation2); 528 return Span.intersects(spans1, spans2); 529 } catch (InvalidSpanException ise) { 530 return false; 531 } 532 } 533 534 /** 535 * @return the size of the span associated with the annotation. If the 536 * annotation has more than one span, then the sum of the size of 537 * the spans is returned. 538 * @throws InvalidSpanException 539 * if the annotation has a badly formed span, then an exception 540 * will be thrown. 541 */ 542 543 public int getSize(SimpleInstance annotation) throws InvalidSpanException { 544 List<Span> spans = getSpans(annotation); 545 int size = 0; 546 for (Span span : spans) { 547 size += span.getSize(); 548 } 549 return size; 550 } 551 552 /** 553 * This method returns the shortest annotation - that is the annotation 554 * whose span is the shortest. If an annotation has more than one span, then 555 * its size is the sum of the size of each of its spans. 556 * 557 * @param annotations 558 * @return will only return one annotation. In the case of a tie, will 559 * return the first annotation with the smallest size encountered 560 * during iteration. Returns null if annotation.size() == 0 or if 561 * each of the annotations has poorly formed spans (very unlikely). 562 */ 563 564 public SimpleInstance getShortestAnnotation(Collection<SimpleInstance> annotations) { 565 if (annotations.size() == 0) 566 return null; 567 568 SimpleInstance shortestAnnotation = null; 569 int shortestAnnotationLength = -1; 570 571 for (SimpleInstance annotation : annotations) { 572 try { 573 int annotationSize = getSize(annotation); 574 if (shortestAnnotationLength == -1 || annotationSize < shortestAnnotationLength) { 575 shortestAnnotation = annotation; 576 shortestAnnotationLength = annotationSize; 577 } 578 } catch (InvalidSpanException ise) { 579 continue; 580 } 581 } 582 return shortestAnnotation; 583 } 584 585 public SimpleInstance getMention(SimpleInstance annotation) { 586 if (isAnnotation(annotation)) { 587 SimpleInstance mention = (SimpleInstance) annotation.getOwnSlotValue(kpu.getAnnotatedMentionSlot()); 588 return mention; 589 } 590 return null; 591 } 592 593 public void setMention(SimpleInstance annotation, SimpleInstance mention) { 594 if (isAnnotation(annotation) && mentionUtil.isMention(mention)) { 595 annotation.setOwnSlotValue(kpu.getAnnotatedMentionSlot(), mention); 596 } 597 } 598 599 public SimpleInstance getAnnotator(SimpleInstance annotation) { 600 if (isAnnotation(annotation)) { 601 return (SimpleInstance) annotation.getOwnSlotValue(kpu.getAnnotationAnnotatorSlot()); 602 } 603 return null; 604 } 605 606 public String getComment(SimpleInstance annotation) { 607 if (isAnnotation(annotation)) { 608 return (String) annotation.getOwnSlotValue(kpu.getAnnotationCommentSlot()); 609 } 610 return null; 611 } 612 613 public void setComment(SimpleInstance annotation, String comment) { 614 if (isAnnotation(annotation)) { 615 annotation.setOwnSlotValue(kpu.getAnnotationCommentSlot(), comment); 616 } 617 } 618 619 public String getCreationDate(SimpleInstance annotation) { 620 if (isAnnotation(annotation)) { 621 return (String) annotation.getOwnSlotValue(kpu.getAnnotationCreationDateSlot()); 622 } 623 return null; 624 } 625 626 public void setCreationDate(SimpleInstance annotation, String creationDate) { 627 if (isAnnotation(annotation)) { 628 annotation.setOwnSlotValue(kpu.getAnnotationCreationDateSlot(), creationDate); 629 } 630 } 631 632 }