001 /*
002 * The contents of this file are subject to the Mozilla Public
003 * License Version 1.1 (the "License"); you may not use this file
004 * except in compliance with the License. You may obtain a copy of
005 * the License at http://www.mozilla.org/MPL/
006 *
007 * Software distributed under the License is distributed on an "AS
008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009 * implied. See the License for the specific language governing
010 * rights and limitations under the License.
011 *
012 * The Original Code is Knowtator.
013 *
014 * The Initial Developer of the Original Code is University of Colorado.
015 * Copyright (C) 2005 - 2008. All Rights Reserved.
016 *
017 * Knowtator was developed by the Center for Computational Pharmacology
018 * (http://compbio.uchcs.edu) at the University of Colorado Health
019 * Sciences Center School of Medicine with support from the National
020 * Library of Medicine.
021 *
022 * Current information about Knowtator can be obtained at
023 * http://knowtator.sourceforge.net/
024 *
025 * Contributor(s):
026 * Philip V. Ogren <philip@ogren.info> (Original Author)
027 */
028
029 package edu.uchsc.ccp.knowtator;
030
031 import java.util.ArrayList;
032 import java.util.Collections;
033 import java.util.Comparator;
034 import java.util.List;
035 import java.util.Set;
036 import java.util.regex.Matcher;
037 import java.util.regex.Pattern;
038
039 import edu.stanford.smi.protege.model.SimpleInstance;
040 import edu.uchsc.ccp.knowtator.event.EventHandler;
041 import edu.uchsc.ccp.knowtator.textsource.TextSource;
042 import edu.uchsc.ccp.knowtator.textsource.TextSourceAccessException;
043 import edu.uchsc.ccp.knowtator.textsource.TextSourceChangeEvent;
044 import edu.uchsc.ccp.knowtator.textsource.TextSourceChangeListener;
045
046 public class SpanUtil implements TextSourceChangeListener {
047 public static final String GROW_ANNOTATION_LEFT = "GROW_ANNOTATION_LEFT";
048
049 public static final String GROW_ANNOTATION_LEFT_WORD = "GROW_ANNOTATION_LEFT_WORD";
050
051 public static final String GROW_ANNOTATION_RIGHT = "GROW_ANNOTATION_RIGHT";
052
053 public static final String GROW_ANNOTATION_RIGHT_WORD = "GROW_ANNOTATION_RIGHT_WORD";
054
055 public static final String SHRINK_ANNOTATION_LEFT = "SHRINK_ANNOTATION_LEFT";
056
057 public static final String SHRINK_ANNOTATION_LEFT_WORD = "SHRINK_ANNOTATION_LEFT_WORD";
058
059 public static final String SHRINK_ANNOTATION_RIGHT = "SHRINK_ANNOTATION_RIGHT";
060
061 public static final String SHRINK_ANNOTATION_RIGHT_WORD = "SHRINK_ANNOTATION_RIGHT_WORD";
062
063 TextSource textSource;
064
065 KnowtatorManager manager;
066
067 KnowtatorProjectUtil kpu;
068
069 AnnotationUtil annotationUtil;
070
071 /** Creates a new instance of SpanUtil */
072 public SpanUtil(KnowtatorManager manager) {
073 this.manager = manager;
074 this.kpu = manager.getKnowtatorProjectUtil();
075 this.annotationUtil = manager.getAnnotationUtil();
076 }
077
078 public void textSourceChanged(TextSourceChangeEvent event) {
079 textSource = event.getTextSource();
080 }
081
082 public boolean canGrowSpanRight(Span span) {
083 try {
084 if (Span.isValid(span.getStart(), span.getEnd() + 1)) {
085 if (span.getEnd() + 1 <= textSource.getText().length()) {
086 return true;
087 }
088 }
089 return false;
090 } catch (Exception exception) {
091 return false;
092 }
093 }
094
095 public boolean canShrinkSpanRight(Span span) {
096 return Span.isValid(span.getStart(), span.getEnd() - 1);
097 }
098
099 public boolean canGrowSpanLeft(Span span) {
100 return Span.isValid(span.getStart() - 1, span.getEnd());
101 }
102
103 public boolean canShrinkSpanLeft(Span span) {
104 return Span.isValid(span.getStart() + 1, span.getEnd());
105 }
106
107 private void growSpanRight(Span span, SimpleInstance annotation) throws InvalidSpanException,
108 TextSourceAccessException {
109 if (canGrowSpanRight(span))
110 editSpan(span, new Span(span.getStart(), span.getEnd() + 1), annotation);
111 }
112
113 private void growSpanRightWord(Span span, SimpleInstance annotation) throws InvalidSpanException,
114 TextSourceAccessException {
115 String text = manager.getTextSourceUtil().getCurrentTextSource().getText();
116 Span superspan = expandToSplits(text, new Span(span.getStart(), span.getEnd() + 2), 30, 30, Pattern
117 .compile(manager.getTokenRegex()));
118 if (superspan != null) {
119 if (superspan.getEnd() == span.getEnd())
120 growSpanRight(span, annotation);
121 else if (superspan.getEnd() > span.getEnd()) {
122 editSpan(span, new Span(span.getStart(), superspan.getEnd()), annotation);
123 }
124 }
125 }
126
127 private void shrinkSpanRight(Span span, SimpleInstance annotation) throws InvalidSpanException,
128 TextSourceAccessException {
129 if (canShrinkSpanRight(span))
130 editSpan(span, new Span(span.getStart(), span.getEnd() - 1), annotation);
131 }
132
133 private void shrinkSpanRightWord(Span span, SimpleInstance annotation) throws InvalidSpanException,
134 TextSourceAccessException {
135 String text = manager.getTextSourceUtil().getCurrentTextSource().getText();
136 Span subspan = shrinkRight(text, span, Pattern.compile(manager.getTokenRegex()));
137 if (subspan != null) {
138 if (subspan.getEnd() == span.getEnd())
139 shrinkSpanRight(span, annotation);
140 else if (subspan.getEnd() < span.getEnd()) {
141 editSpan(span, subspan, annotation);
142 }
143 }
144 }
145
146 private void shrinkSpanLeftWord(Span span, SimpleInstance annotation) throws InvalidSpanException,
147 TextSourceAccessException {
148 String text = manager.getTextSourceUtil().getCurrentTextSource().getText();
149 Span subspan = shrinkLeft(text, span, Pattern.compile(manager.getTokenRegex()));
150 if (subspan != null) {
151 if (subspan.getStart() == span.getStart())
152 shrinkSpanLeft(span, annotation);
153 else if (subspan.getStart() > span.getStart()) {
154 editSpan(span, subspan, annotation);
155 }
156 }
157 }
158
159 private void growSpanLeft(Span span, SimpleInstance annotation) throws InvalidSpanException,
160 TextSourceAccessException {
161 if (canGrowSpanLeft(span))
162 editSpan(span, new Span(span.getStart() - 1, span.getEnd()), annotation);
163 }
164
165 private void growSpanLeftWord(Span span, SimpleInstance annotation) throws InvalidSpanException,
166 TextSourceAccessException {
167 if (span.getStart() == 0)
168 return;
169 String text = manager.getTextSourceUtil().getCurrentTextSource().getText();
170
171 Span superspan = expandToSplits(text, new Span(span.getStart() - 2, span.getEnd()), 30, 30, Pattern
172 .compile(manager.getTokenRegex()));
173 if (superspan != null) {
174 if (superspan.getStart() == span.getStart())
175 growSpanLeft(span, annotation);
176 else if (superspan.getStart() < span.getStart() && superspan.getStart() >= 0) {
177 editSpan(span, new Span(superspan.getStart(), span.getEnd()), annotation);
178 }
179 }
180 }
181
182 private void shrinkSpanLeft(Span span, SimpleInstance annotation) throws InvalidSpanException,
183 TextSourceAccessException {
184 if (canShrinkSpanLeft(span))
185 editSpan(span, new Span(span.getStart() + 1, span.getEnd()), annotation);
186 }
187
188 /**
189 * The strategy for updating a span is to get the Span objects for the
190 * annotationInstance loop through them and find the one that matches
191 * 'oldSpan' and update that span with the 'newSpan' in the list of spans
192 * returned from annotationUtil.getSpans. The list of spans are then sent
193 * back to annotationUtil.setSpans.
194 */
195
196 private void editSpan(Span oldSpan, Span newSpan, SimpleInstance annotation) throws InvalidSpanException,
197 TextSourceAccessException {
198 List<Span> spans = new ArrayList<Span>(annotationUtil.getSpans(annotation));
199 if (spans.size() == 0) {
200 System.out.println("empty span list");
201 }
202 for (int i = 0; i < spans.size(); i++) {
203 Span annotationSpan = spans.get(i);
204 if (annotationSpan.equals(oldSpan)) {
205 spans.set(i, newSpan);
206 if (newSpan.getStart() == newSpan.getEnd())
207 spans.remove(i);
208 break;
209 }
210 }
211 annotationUtil.setSpans(annotation, spans, null);
212 manager.refreshAnnotationsDisplay(true);
213 }
214
215 public void editSpans(List<Span> spans, SimpleInstance annotation, String editType) throws InvalidSpanException,
216 TextSourceAccessException {
217 for (Span span : spans) {
218 if (editType.equals(GROW_ANNOTATION_LEFT))
219 growSpanLeft(span, annotation);
220 if (editType.equals(GROW_ANNOTATION_LEFT_WORD))
221 growSpanLeftWord(span, annotation);
222 else if (editType.equals(GROW_ANNOTATION_RIGHT))
223 growSpanRight(span, annotation);
224 else if (editType.equals(GROW_ANNOTATION_RIGHT_WORD))
225 growSpanRightWord(span, annotation);
226 else if (editType.equals(SHRINK_ANNOTATION_LEFT))
227 shrinkSpanLeft(span, annotation);
228 else if (editType.equals(SHRINK_ANNOTATION_LEFT_WORD))
229 shrinkSpanLeftWord(span, annotation);
230 else if (editType.equals(SHRINK_ANNOTATION_RIGHT))
231 shrinkSpanRight(span, annotation);
232 else if (editType.equals(SHRINK_ANNOTATION_RIGHT_WORD))
233 shrinkSpanRightWord(span, annotation);
234 }
235 EventHandler.getInstance().fireSpanEditted(annotation);
236
237 }
238
239 /**
240 * The purpose of this method is to "expand" the substringSpan such that the
241 * start and end of the returned span are at splits in the string.
242 *
243 * @param superString
244 * the string that is being looked at
245 * @param substringSpan
246 * a span that designates some arbitrary substring of the
247 * superString
248 * @param frontWindowSize
249 * the string that immediately precedes the substring is
250 * considered the frontwindow. This parameter sets the maximum
251 * size of the frontwindow (it may be smaller if the superstring
252 * doesn't have enough text that precedes the substring.)
253 * @param rearWindowSize
254 * the string that immediately follows the substring is
255 * considered the rearwindow. This parameter sets the maximum
256 * size of the rearwindow (it may be smaller if the superstring
257 * doesn't have enough text that follows the substring.)
258 * @param splitPattern
259 * a regular expression that defines a "split" or a word
260 * boundary. For example, you might pass in Pattern.compile(\\W+)
261 * to define a boundary as non-word characters.
262 * @return a span that is "expanded" to start and end at word boundaries.
263 * See the associated unit tests for examples. Typically the
264 * substring associated with the returned span will be larger than
265 * the substring associated with the substringSpan parameter.
266 * However, if the substring associated with the substringSpan
267 * begins or ends with the splitPattern, then the returned substring
268 * may be shorter.
269 *
270 */
271 public static Span expandToSplits(String superString, Span substringSpan, int frontWindowSize, int rearWindowSize,
272 Pattern splitPattern) {
273 try {
274 if (!(new Span(0, superString.length()).contains(substringSpan))) {
275 return null;
276 }
277
278 String substring = superString.substring(substringSpan.getStart(), substringSpan.getEnd());
279
280 // First we will determine the start of the returned span
281 // If the substring starts with the splitPattern then we will move
282 // the start
283 // to the "right". Otherwise we will look for the last splitPattern
284 // in the
285 // front window.
286 int returnSpanStart = substringSpan.getStart();
287
288 // shrink the span start if substring begins with splitPattern
289 boolean shrinkSpanStart = false;
290 Matcher matcher = splitPattern.matcher(substring);
291 if (matcher.find()) {
292 if (matcher.start() == 0) {
293 returnSpanStart += matcher.end();
294 shrinkSpanStart = true;
295 }
296 }
297
298 // if substring does not begin with splitPattern, then find last
299 // splitPattern
300 // in frontWindow
301 if (!shrinkSpanStart) {
302 // The front window starts at either 0 or frontWindowSize from
303 // the substringSpan start
304 int frontWindowStart = Math.max(0, substringSpan.getStart() - frontWindowSize);
305 String frontWindow = superString.substring(frontWindowStart, substringSpan.getStart());
306 matcher = splitPattern.matcher(frontWindow);
307 int lastMatch = -1;
308 if (frontWindowStart == 0) {
309 lastMatch = 0;
310 }
311 while (matcher.find()) {
312 lastMatch = matcher.end();
313 }
314
315 if (lastMatch > -1) {
316 returnSpanStart -= frontWindow.substring(lastMatch).length();
317 }
318 }
319
320 // now we figure out the end of the returned span
321 // If the substring ends with the splitPattern then we will move the
322 // end
323 // to the "left". Otherwise we will look for the last splitPattern
324 // in the
325 // front window.
326 int returnSpanEnd = substringSpan.getEnd();
327
328 // shrink the span end if substring ends with splitPattern
329 boolean shrinkSpanEnd = false;
330 StringBuffer reverseStringBuffer = new StringBuffer(substring).reverse();
331 String reverseSubstring = reverseStringBuffer.toString();
332
333 matcher = splitPattern.matcher(reverseSubstring);
334 if (matcher.find()) {
335 if (matcher.start() == 0) {
336 returnSpanEnd -= matcher.end();
337 shrinkSpanEnd = true;
338 }
339 }
340
341 if (!shrinkSpanEnd) {
342 int rearWindowEnd = Math.min(superString.length(), substringSpan.getEnd() + rearWindowSize);
343 String rearWindow = superString.substring(substringSpan.getEnd(), rearWindowEnd);
344 matcher = splitPattern.matcher(rearWindow);
345 if (matcher.find()) {
346 if (matcher.start() != 0) {
347 returnSpanEnd = returnSpanEnd + matcher.start();
348 }
349 } else if (rearWindowEnd == superString.length()) {
350 returnSpanEnd = rearWindowEnd;
351 }
352
353 }
354 return new Span(returnSpanStart, returnSpanEnd);
355 } catch (InvalidSpanException ise) {
356 return substringSpan;
357 }
358
359 }
360
361 public static Span shrinkRight(String superString, Span span, Pattern splitPattern) {
362 if (!(new Span(0, superString.length()).contains(span))) {
363 return null;
364 }
365
366 String spannedString = Span.substring(superString, span);
367 Matcher matcher = splitPattern.matcher(spannedString);
368 int lastIndex = -1;
369 while (matcher.find()) {
370 lastIndex = matcher.start();
371 }
372
373 if (lastIndex == -1)
374 return span;
375
376 lastIndex = span.getStart() + lastIndex;
377 if (lastIndex > span.getStart() && lastIndex < span.getEnd())
378 return new Span(span.getStart(), lastIndex);
379
380 return span;
381 }
382
383 public static Span shrinkLeft(String superString, Span span, Pattern splitPattern) {
384 if (!(new Span(0, superString.length()).contains(span))) {
385 return null;
386 }
387
388 String spannedString = Span.substring(superString, span);
389 Matcher matcher = splitPattern.matcher(spannedString);
390 int lastIndex = -1;
391 if (matcher.find()) {
392 lastIndex = matcher.end();
393 }
394
395 if (lastIndex == -1)
396 return span;
397
398 lastIndex = span.getStart() + lastIndex;
399 if (lastIndex > span.getStart() && lastIndex < span.getEnd())
400 return new Span(lastIndex, span.getEnd());
401
402 return span;
403 }
404
405 public Comparator<SimpleInstance> comparator(final Comparator<SimpleInstance> noSpansComparator) {
406 return new Comparator<SimpleInstance>() {
407 public int compare(SimpleInstance annotation1, SimpleInstance annotation2) {
408 List<Span> spans1;
409 try {
410 spans1 = new ArrayList<Span>(annotationUtil.getSpans(annotation1));
411 if (spans1.size() == 0) {
412 Span referencedSpan = getAReferencedSpan(annotation1);
413 if (referencedSpan != null)
414 spans1.add(referencedSpan);
415 }
416 } catch (InvalidSpanException ise) {
417 spans1 = new ArrayList<Span>();
418 }
419 List<Span> spans2;
420 try {
421 spans2 = new ArrayList<Span>(annotationUtil.getSpans(annotation2));
422 if (spans2.size() == 0) {
423 Span referencedSpan = getAReferencedSpan(annotation2);
424 if (referencedSpan != null)
425 spans2.add(referencedSpan);
426 }
427
428 } catch (InvalidSpanException ise) {
429 spans2 = new ArrayList<Span>();
430 }
431 if (spans1.size() == 0 && spans2.size() == 0 && noSpansComparator != null) {
432 return noSpansComparator.compare(annotation1, annotation2);
433 } else if (spans1.size() == 0) {
434 return 1;
435 } else if (spans2.size() == 0) {
436 return -1;
437 } else {
438 int comparison = spans1.get(0).compareTo(spans2.get(0));
439 if (comparison == 0 && noSpansComparator != null) {
440 return noSpansComparator.compare(annotation1, annotation2);
441 } else
442 return comparison;
443 }
444 }
445 };
446 }
447
448 public Comparator lengthComparator() {
449 return new Comparator<SimpleInstance>() {
450 public int compare(SimpleInstance annotation1, SimpleInstance annotation2) {
451 List<Span> spans1 = annotationUtil.getSpans(annotation1);
452 List<Span> spans2 = annotationUtil.getSpans(annotation2);
453 if (spans1.size() == 0) {
454 return -1;
455 } else if (spans2.size() == 0) {
456 return 1;
457 } else {
458 int length1 = spansLength(spans1);
459 int length2 = spansLength(spans2);
460
461 if (length1 <= length2)
462 return -1;
463 else
464 return 1;
465 }
466 }
467 };
468 }
469
470 public int spansLength(List<Span> spans) {
471 int length = 0;
472 for (Span span : spans) {
473 length += span.getSize();
474 }
475 return length;
476 }
477
478 public Span getAReferencedSpan(SimpleInstance annotation) {
479 List<Span> returnValues = new ArrayList<Span>();
480 Set<SimpleInstance> referencedAnnotations = annotationUtil.getRelatedAnnotations(annotation);
481 for (SimpleInstance referencedAnnotation : referencedAnnotations) {
482 List<Span> spans = annotationUtil.getSpans(referencedAnnotation);
483 returnValues.addAll(spans);
484
485 }
486 if (returnValues.size() > 0) {
487 Collections.sort(returnValues);
488 return returnValues.get(0);
489 }
490 return null;
491 }
492
493 /**
494 *
495 * @param annotation
496 * @return the span of the annotation that has the smallest start index. If
497 * no span exists for the annotation, then it will return the span
498 * with the smallest start index from all of the spans of all of the
499 * annotations (recursively) related annotations
500 */
501 public Span getFirstSpan(SimpleInstance annotation) {
502 List<Span> spans1 = annotationUtil.getSpans(annotation);
503 if (spans1 == null || spans1.size() == 0) {
504 return manager.getSpanUtil().getAReferencedSpan(annotation);
505 } else {
506 Collections.sort(spans1);
507 return spans1.get(0);
508 }
509 }
510 }