001 /*
002 * The contents of this file are subject to the Mozilla Public
003 * License Version 1.1 (the "License"); you may not use this file
004 * except in compliance with the License. You may obtain a copy of
005 * the License at http://www.mozilla.org/MPL/
006 *
007 * Software distributed under the License is distributed on an "AS
008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009 * implied. See the License for the specific language governing
010 * rights and limitations under the License.
011 *
012 * The Original Code is Knowtator.
013 *
014 * The Initial Developer of the Original Code is University of Colorado.
015 * Copyright (C) 2005 - 2008. All Rights Reserved.
016 *
017 * Knowtator was developed by the Center for Computational Pharmacology
018 * (http://compbio.uchcs.edu) at the University of Colorado Health
019 * Sciences Center School of Medicine with support from the National
020 * Library of Medicine.
021 *
022 * Current information about Knowtator can be obtained at
023 * http://knowtator.sourceforge.net/
024 *
025 * Contributor(s):
026 * Philip V. Ogren <philip@ogren.info> (Original Author)
027 */
028
029 package edu.uchsc.ccp.knowtator;
030
031 import java.util.ArrayList;
032 import java.util.Collection;
033 import java.util.Collections;
034 import java.util.HashMap;
035 import java.util.HashSet;
036 import java.util.List;
037 import java.util.Map;
038 import java.util.Set;
039
040 import edu.stanford.smi.protege.model.Cls;
041 import edu.stanford.smi.protege.model.Instance;
042 import edu.stanford.smi.protege.model.KnowledgeBase;
043 import edu.stanford.smi.protege.model.Project;
044 import edu.stanford.smi.protege.model.SimpleInstance;
045 import edu.stanford.smi.protege.util.CollectionUtilities;
046 import edu.uchsc.ccp.knowtator.event.SpanEditEvent;
047 import edu.uchsc.ccp.knowtator.event.SpanEditListener;
048 import edu.uchsc.ccp.knowtator.textsource.TextSource;
049 import edu.uchsc.ccp.knowtator.textsource.TextSourceAccessException;
050 import edu.uchsc.ccp.knowtator.util.ProtegeUtil;
051
052 /**
053 * Authors: Philip V. Ogren Created: September, 2004 Description: This class
054 * aids in creating, editing and retrieving annotation intances as well as the
055 * supporting annotation classes. The annotation model is defined in
056 * annotations.pprj. This project must be included when the annotator plug-in is
057 * used. Todo: AnnotationUtil should really not have an instance of knowtator as
058 * a member variable. I need to create the appropriate listeners, so that
059 * Knowtator can take care of itself as it see fits. Changes: 02/28/2005 added
060 * annotation filter functionality 05/02/2005 package changed to ...knowtator
061 * 8/11/2005 pvo added getReferencedAnnotations method 10/04/2005 Changed
062 * signature of main createAnnotations method so that a string for the spanned
063 * text can be passed in. This removes the necessity of gathering the spanned
064 * text from the actual text source which may be an unnecessary task if you are
065 * copying an annotation (e.g. as in MergeAnnotations). This change had a small
066 * ripple effect on other methods: setSpans, updateAnnotationText 10/04/2005
067 * Changed name of deleteAnnotationMention to deleteMention (I just didn't like
068 * the name!)
069 */
070
071 public class AnnotationUtil implements SpanEditListener {
072
073 KnowtatorManager manager;
074
075 KnowtatorProjectUtil kpu;
076
077 KnowledgeBase kb;
078
079 Project project;
080
081 TextSourceUtil textSourceUtil;
082
083 MentionUtil mentionUtil;
084
085 Map<SimpleInstance, java.util.List<Span>> annotationSpansCache;
086
087 public AnnotationUtil(KnowtatorManager manager) {
088 this.manager = manager;
089 kpu = manager.getKnowtatorProjectUtil();
090 kb = manager.getKnowledgeBase();
091 this.project = kb.getProject();
092 annotationSpansCache = new HashMap<SimpleInstance, java.util.List<Span>>();
093 // EventHandler.getInstance().addSpanEditListener(this); //not needed
094 // because SpanUtil updates list retrieved from getSpans()
095 }
096
097 public void setTextSourceUtil(TextSourceUtil textSourceUtil) {
098 this.textSourceUtil = textSourceUtil;
099 }
100
101 public void setMentionUtil(MentionUtil mentionUtil) {
102 this.mentionUtil = mentionUtil;
103 }
104
105 /**
106 * This method deletes the mention of annotation.
107 *
108 * Although it is not possible with the current interface to associate a
109 * mention with more than one annotation, there is no other reason why this
110 * can't happen. If a mention is associated with more than one annotation,
111 * then the mention will not be deleted.
112 *
113 * @param annotation
114 * the mention of this annotation will be deleted via
115 * mentionUtil.deleteMention.
116 *
117 */
118
119 public void deleteMention(SimpleInstance annotation) {
120 SimpleInstance mention = getMention(annotation);
121
122 if (mention != null) {
123 mentionUtil.deleteMention(mention);
124 }
125
126 }
127
128 public void spanEditted(SpanEditEvent see) {
129 annotationSpansCache.remove(see.getAnnotation());
130 }
131
132 /**
133 * Returns a list of Span objects that correspond to the span values given
134 * to an annotation instance. The spans will be ordered as defined by the
135 * Comparable interface implementation in Span. If a span value from an
136 * annotation does not parse (i.e. has a string value that is not in the
137 * correct span format), then an InvalidSpanException will be thrown.
138 *
139 * @param annotation
140 * the spans for the annotation will be returned
141 * @return a list of spans corresponding to the annotation's
142 * kpu.getAnnotationSpanSlot()
143 */
144 public java.util.List<Span> getSpans(SimpleInstance annotation) throws InvalidSpanException {
145 return getSpans(annotation, false);
146 }
147
148 /**
149 * Returns a list of Span objects that correspond to the span values given
150 * to an annotation instance. The spans will be ordered as defined by the
151 * Comparable interface implementation in Span. If a span value from an
152 * annotation does not parse (i.e. has a string value that is not in the
153 * correct span format), then an InvalidSpanException will be thrown.
154 *
155 * @param annotation
156 * the spans for the annotation will be returned
157 * @return a list of spans corresponding to the annotation's
158 * kpu.getAnnotationSpanSlot()
159 */
160 public java.util.List<Span> getSpans(SimpleInstance annotation, boolean ignoreCache) throws InvalidSpanException {
161 if (annotation == null)
162 return Collections.emptyList();
163 if (annotationSpansCache.containsKey(annotation) && !ignoreCache) {
164 return annotationSpansCache.get(annotation);
165 }
166
167 Collection<String> spanStrings = ProtegeUtil.castStrings(annotation.getOwnSlotValues(kpu.annotationSpanSlot));
168
169 ArrayList<Span> spans = new ArrayList<Span>();
170 for (String spanString : spanStrings) {
171 spans.add(Span.parseSpan(spanString));
172 }
173
174 Collections.sort(spans);
175 annotationSpansCache.put(annotation, spans);
176 return spans;
177 }
178
179 /**
180 * Sets the spans slot for an annotation instance. If a
181 *
182 * @param annotation
183 * set the span slot for the passed in annotation
184 * @param spans
185 * a list of spans for the annotation. This will typically be a
186 * list of 1 span object, but may occasionally have a 2 or more
187 * spans.
188 * @param spannedText
189 * if null, then the text from the text source for the annotation
190 * will be used to determine the spannedText. If the spannedText
191 * is already known, then it may be useful to pass in the
192 * spannedText. Passing in an empty string will cause the
193 * spannedText to be an empty string.
194 */
195 public void setSpans(SimpleInstance annotation, java.util.List<Span> spans, String spannedText)
196 throws TextSourceAccessException {
197 ArrayList<String> spanStrings = new ArrayList<String>(spans.size());
198 for (Span span : spans) {
199 spanStrings.add(span.toString());
200 }
201 annotation.setOwnSlotValues(kpu.annotationSpanSlot, spanStrings);
202 if (spannedText == null && spans.size() > 0)
203 updateSpannedText(annotation, spans);
204 else {
205 setText(annotation, spannedText);
206 }
207 annotationSpansCache.put(annotation, spans);
208 }
209
210 /**
211 * updates the spanned text of an annotation of a text source. This is does
212 * this by calling TextSource.getText(spans). If TextSource cannot be found
213 * using TextSourceUtil.getTextSource(TextSource), then a
214 * TextSourceAccessException is thrown.
215 *
216 * @param annotation
217 * the annotation whose spanned text is being updated
218 * @param spans
219 * the spans corresponding to offsets in the text source.
220 */
221 private void updateSpannedText(SimpleInstance annotation, java.util.List<Span> spans)
222 throws TextSourceAccessException {
223 SimpleInstance textSource = getTextSource(annotation);
224 if (textSource != null) {
225 TextSource ts = textSourceUtil.getTextSource(textSource);
226 String spannedText = ts.getText(spans);
227 setText(annotation, spannedText);
228 }
229 }
230
231 /**
232 * Returns the "annotation" instances associated with a TextSource if the
233 * TextSource does not have a corresponding "text source" instance in the
234 * knowledgebase, then null is returned. If the "text source" instance does
235 * exist then all of the "annotation" instances are found and returned. If
236 * none are found, an empty array of instances is returned.
237 */
238
239 public Collection<SimpleInstance> getAnnotations(TextSource textSource) {
240 SimpleInstance textSourceInstance = textSourceUtil.getTextSourceInstance(textSource, false);
241 if (textSourceInstance == null)
242 return null;
243 else
244 return getAnnotations(textSourceInstance);
245 }
246
247 public Collection<SimpleInstance> getAnnotations(String textSourceName) {
248 SimpleInstance textSourceInstance = (SimpleInstance) kb.getInstance(textSourceName);
249 if (textSourceInstance != null) {
250 return getAnnotations(textSourceInstance);
251 } else {
252 return null;
253 }
254 }
255
256 /**
257 * Does the work of finding all "annotation" instances that have the
258 * textSourceInstance as the value of "annotation_text_source" slot.
259 */
260 public Collection<SimpleInstance> getAnnotations(SimpleInstance textSourceInstance) {
261 Collection<Instance> annotations = kb.getInstances(kpu.getAnnotationCls());
262 Collection<SimpleInstance> returnValues = new ArrayList<SimpleInstance>();
263 for (Instance ann : annotations) {
264 SimpleInstance annotation = (SimpleInstance) ann;
265 SimpleInstance ts = getTextSource(annotation);
266 if (ts != null && ts.equals(textSourceInstance)) {
267 returnValues.add(annotation);
268 }
269 }
270 return returnValues;
271 }
272
273 public SimpleInstance createAnnotation(Cls annotationCls, java.util.List<Span> spans, String spannedText,
274 String textSourceName) throws TextSourceAccessException {
275 return createAnnotation(annotationCls, spans, spannedText, textSourceName, null, null);
276 }
277
278 public SimpleInstance createAnnotation(Cls annotationCls, java.util.List<Span> spans, String spannedText,
279 String textSourceName, SimpleInstance annotator, SimpleInstance set) throws TextSourceAccessException {
280 SimpleInstance mention = mentionUtil.createMention(annotationCls);
281 SimpleInstance textSource = kb.getSimpleInstance(textSourceName);
282 if (textSource == null)
283 textSource = kb.createSimpleInstance(null, textSourceName, CollectionUtilities.createCollection(kpu.getTextSourceCls()),
284 true);
285
286 Collection<SimpleInstance> annotationSets = new ArrayList<SimpleInstance>();
287 if (set != null) {
288 annotationSets.add(set);
289 }
290 return createAnnotation(mention, annotator, spans, spannedText, textSource, annotationSets);
291 }
292
293 /**
294 * If the textSource does not have a corresponding instance in the kb, then
295 * one will be created.
296 */
297
298 public SimpleInstance createAnnotation(SimpleInstance mention, SimpleInstance annotator,
299 java.util.List<Span> spans, TextSource textSource, SimpleInstance annotationSet)
300 throws TextSourceAccessException {
301 SimpleInstance textSourceInstance = textSourceUtil.getTextSourceInstance(textSource, true);
302
303 return createAnnotation(mention, annotator, spans, textSourceInstance, annotationSet);
304 }
305
306 public SimpleInstance createAnnotation(SimpleInstance mention, SimpleInstance annotator,
307 java.util.List<Span> spans, SimpleInstance textSourceInstance, SimpleInstance annotationSet)
308 throws TextSourceAccessException {
309 Collection<SimpleInstance> annotationSets = new ArrayList<SimpleInstance>();
310 if (annotationSet != null) {
311 annotationSets.add(annotationSet);
312 }
313
314 return createAnnotation(mention, annotator, spans, null, textSourceInstance, annotationSets);
315 }
316
317 public SimpleInstance createAnnotation(SimpleInstance mention, SimpleInstance annotator,
318 java.util.List<Span> spans, String spannedText, SimpleInstance textSourceInstance,
319 java.util.Collection<SimpleInstance> annotationSets) throws TextSourceAccessException {
320 SimpleInstance annotationInstance = kb.createSimpleInstance(null, null, CollectionUtilities
321 .createCollection(kpu.annotationCls), true);
322 if (mention != null) {
323 annotationInstance.setOwnSlotValue(kpu.annotatedMentionSlot, mention);
324 }
325
326 if (annotator != null) {
327 annotationInstance.setOwnSlotValue(kpu.annotationAnnotatorSlot, annotator);
328 }
329
330 if (annotationSets != null && annotationSets.size() > 0) {
331 annotationInstance.setOwnSlotValues(kpu.setSlot, annotationSets);
332 }
333
334 if (textSourceInstance != null) {
335 annotationInstance.setOwnSlotValue(kpu.annotationTextSourceSlot, textSourceInstance);
336 }
337
338 setSpans(annotationInstance, spans, spannedText);
339
340 return annotationInstance;
341
342 }
343
344 /**
345 * Creates an annotation. Overloaded to include the creation date in the
346 * annotation. Designed to be used for creating the annotation while
347 * importing an XML file.
348 *
349 * @param creationDate
350 * A String representation of the date (time stamp) the
351 * annotation was created.
352 *
353 * @throws TextSourceAccessException
354 */
355 public SimpleInstance createAnnotation(SimpleInstance mention, SimpleInstance annotator,
356 java.util.List<Span> spans, String spannedText, SimpleInstance textSourceInstance,
357 java.util.Collection<SimpleInstance> annotationSets, String creationDate) throws TextSourceAccessException {
358
359 SimpleInstance annotationInstance = createAnnotation(mention, annotator, spans, spannedText,
360 textSourceInstance, annotationSets);
361
362 if (creationDate != null) {
363 setCreationDate(annotationInstance, creationDate);
364 }
365
366 return annotationInstance;
367 }
368
369 /**
370 * This method returns true if two annotations have exactly the same spans.
371 */
372 public boolean compareSpans(SimpleInstance annotation1, SimpleInstance annotation2) {
373 try {
374 java.util.List<Span> spans1 = getSpans(annotation1);
375 java.util.List<Span> spans2 = getSpans(annotation2);
376
377 return Span.spansMatch(spans1, spans2);
378 } catch (InvalidSpanException ise) {
379 return false;
380 }
381 }
382
383 public boolean compareSpans(List<SimpleInstance> annotations) {
384 for (int i = 1; i < annotations.size(); i++) {
385 if (!compareSpans(annotations.get(0), annotations.get(i)))
386 return false;
387 }
388 return true;
389 }
390
391 public boolean isAnnotation(SimpleInstance annotation) {
392 if (annotation == null)
393 return false;
394
395 Cls type = annotation.getDirectType();
396 if (type == null)
397 return false;
398 if (type.equals(kpu.getAnnotationCls())) {
399 return true;
400 }
401 return type.getSuperclasses().contains(kpu.getAnnotationCls());
402 }
403
404 public boolean hasTeamAnnotator(SimpleInstance annotation) {
405 SimpleInstance annotator = getAnnotator(annotation);
406 if (annotator == null)
407 return false;
408 Cls annotatorType = annotator.getDirectType();
409 if (annotatorType == null)
410 return false;
411 if (annotatorType.equals(kpu.getTeamAnnotatorCls()))
412 return true;
413 return annotatorType.getSuperclasses().contains(kpu.getTeamAnnotatorCls());
414
415 }
416
417 public void setProjectAnnotator(SimpleInstance annotation) {
418 if (isAnnotation(annotation)) {
419 setAnnotator(annotation, manager.getSelectedAnnotator());
420 }
421 }
422
423 public void setProjectAnnotationSet(SimpleInstance annotation) {
424 if (isAnnotation(annotation)) {
425 setSet(annotation, manager.getSelectedAnnotationSet());
426 }
427 }
428
429 /**
430 * Returns all annotations that are related to the passed in annotation. If
431 * an annotation is the slot value of another annotation (or more correctly
432 * if the mention of an annotation is a slot mention's value of another
433 * annotation's mention).
434 *
435 * This method recursively gathers all annotations that are related (not
436 * just directly related).
437 */
438
439 public Set<SimpleInstance> getRelatedAnnotations(SimpleInstance annotation) {
440 Set<SimpleInstance> referencedAnnotations = new HashSet<SimpleInstance>();
441 _getRelatedAnnotations(annotation, referencedAnnotations);
442 return referencedAnnotations;
443 }
444
445 private void _getRelatedAnnotations(SimpleInstance annotation, Set<SimpleInstance> referencedAnnotations) {
446 SimpleInstance mentionInstance = getMention(annotation);
447 if (mentionInstance != null) {
448 List<SimpleInstance> referencedMentions = mentionUtil.getRelatedMentions(mentionInstance);
449 for (SimpleInstance referencedMention : referencedMentions) {
450 SimpleInstance ann = mentionUtil.getMentionAnnotation(referencedMention);
451 if (!referencedAnnotations.contains(ann)) {
452 referencedAnnotations.add(ann);
453 _getRelatedAnnotations(ann, referencedAnnotations);
454 }
455 }
456 }
457 }
458
459 public List<SimpleInstance> retrieveAllAnnotations() {
460 Collection<SimpleInstance> instances = ProtegeUtil.castSimpleInstances(kpu.getAnnotationCls().getInstances());
461 List<SimpleInstance> annotations = new ArrayList<SimpleInstance>(instances);
462 return annotations;
463 }
464
465 public void setAnnotator(SimpleInstance annotation, SimpleInstance annotator) {
466 if (isAnnotation(annotation) && annotator != null) {
467 annotation.setOwnSlotValue(kpu.getAnnotationAnnotatorSlot(), annotator);
468 }
469 }
470
471 public void setSet(SimpleInstance annotation, SimpleInstance set) {
472 Collection<SimpleInstance> sets = new ArrayList<SimpleInstance>();
473 sets.add(set);
474 setSets(annotation, sets);
475 }
476
477 public void setSets(SimpleInstance annotation, Collection<SimpleInstance> sets) {
478 if (sets == null) {
479 sets = Collections.emptyList();
480 }
481 if (isAnnotation(annotation)) {
482 annotation.setOwnSlotValues(kpu.getSetSlot(), sets);
483 }
484 }
485
486 public Set<SimpleInstance> getSets(SimpleInstance annotation) {
487 if (isAnnotation(annotation)) {
488 Collection<SimpleInstance> sets = ProtegeUtil.castSimpleInstances(annotation.getOwnSlotValues(kpu
489 .getSetSlot()));
490 if (sets != null) {
491 return new HashSet<SimpleInstance>(sets);
492 }
493 }
494 return Collections.emptySet();
495 }
496
497 public SimpleInstance getTextSource(SimpleInstance annotation) {
498 if (isAnnotation(annotation)) {
499 SimpleInstance textSource = (SimpleInstance) annotation.getOwnSlotValue(kpu.getAnnotationTextSourceSlot());
500 return textSource;
501 }
502 return null;
503 }
504
505 public void setTextSource(SimpleInstance annotation, SimpleInstance textSource) {
506 if (isAnnotation(annotation)) {
507 annotation.setOwnSlotValue(kpu.getAnnotationTextSourceSlot(), textSource);
508 }
509 }
510
511 public String getText(SimpleInstance annotation) {
512 if (isAnnotation(annotation)) {
513 return (String) annotation.getOwnSlotValue(kpu.getAnnotationTextSlot());
514 }
515 return null;
516 }
517
518 public void setText(SimpleInstance annotation, String spannedText) {
519 if (isAnnotation(annotation)) {
520 annotation.setOwnSlotValue(kpu.getAnnotationTextSlot(), spannedText);
521 }
522 }
523
524 public boolean spansOverlap(SimpleInstance annotation1, SimpleInstance annotation2) {
525 try {
526 List<Span> spans1 = getSpans(annotation1);
527 List<Span> spans2 = getSpans(annotation2);
528 return Span.intersects(spans1, spans2);
529 } catch (InvalidSpanException ise) {
530 return false;
531 }
532 }
533
534 /**
535 * @return the size of the span associated with the annotation. If the
536 * annotation has more than one span, then the sum of the size of
537 * the spans is returned.
538 * @throws InvalidSpanException
539 * if the annotation has a badly formed span, then an exception
540 * will be thrown.
541 */
542
543 public int getSize(SimpleInstance annotation) throws InvalidSpanException {
544 List<Span> spans = getSpans(annotation);
545 int size = 0;
546 for (Span span : spans) {
547 size += span.getSize();
548 }
549 return size;
550 }
551
552 /**
553 * This method returns the shortest annotation - that is the annotation
554 * whose span is the shortest. If an annotation has more than one span, then
555 * its size is the sum of the size of each of its spans.
556 *
557 * @param annotations
558 * @return will only return one annotation. In the case of a tie, will
559 * return the first annotation with the smallest size encountered
560 * during iteration. Returns null if annotation.size() == 0 or if
561 * each of the annotations has poorly formed spans (very unlikely).
562 */
563
564 public SimpleInstance getShortestAnnotation(Collection<SimpleInstance> annotations) {
565 if (annotations.size() == 0)
566 return null;
567
568 SimpleInstance shortestAnnotation = null;
569 int shortestAnnotationLength = -1;
570
571 for (SimpleInstance annotation : annotations) {
572 try {
573 int annotationSize = getSize(annotation);
574 if (shortestAnnotationLength == -1 || annotationSize < shortestAnnotationLength) {
575 shortestAnnotation = annotation;
576 shortestAnnotationLength = annotationSize;
577 }
578 } catch (InvalidSpanException ise) {
579 continue;
580 }
581 }
582 return shortestAnnotation;
583 }
584
585 public SimpleInstance getMention(SimpleInstance annotation) {
586 if (isAnnotation(annotation)) {
587 SimpleInstance mention = (SimpleInstance) annotation.getOwnSlotValue(kpu.getAnnotatedMentionSlot());
588 return mention;
589 }
590 return null;
591 }
592
593 public void setMention(SimpleInstance annotation, SimpleInstance mention) {
594 if (isAnnotation(annotation) && mentionUtil.isMention(mention)) {
595 annotation.setOwnSlotValue(kpu.getAnnotatedMentionSlot(), mention);
596 }
597 }
598
599 public SimpleInstance getAnnotator(SimpleInstance annotation) {
600 if (isAnnotation(annotation)) {
601 return (SimpleInstance) annotation.getOwnSlotValue(kpu.getAnnotationAnnotatorSlot());
602 }
603 return null;
604 }
605
606 public String getComment(SimpleInstance annotation) {
607 if (isAnnotation(annotation)) {
608 return (String) annotation.getOwnSlotValue(kpu.getAnnotationCommentSlot());
609 }
610 return null;
611 }
612
613 public void setComment(SimpleInstance annotation, String comment) {
614 if (isAnnotation(annotation)) {
615 annotation.setOwnSlotValue(kpu.getAnnotationCommentSlot(), comment);
616 }
617 }
618
619 public String getCreationDate(SimpleInstance annotation) {
620 if (isAnnotation(annotation)) {
621 return (String) annotation.getOwnSlotValue(kpu.getAnnotationCreationDateSlot());
622 }
623 return null;
624 }
625
626 public void setCreationDate(SimpleInstance annotation, String creationDate) {
627 if (isAnnotation(annotation)) {
628 annotation.setOwnSlotValue(kpu.getAnnotationCreationDateSlot(), creationDate);
629 }
630 }
631
632 }