001 /* 002 * The contents of this file are subject to the Mozilla Public 003 * License Version 1.1 (the "License"); you may not use this file 004 * except in compliance with the License. You may obtain a copy of 005 * the License at http://www.mozilla.org/MPL/ 006 * 007 * Software distributed under the License is distributed on an "AS 008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 009 * implied. See the License for the specific language governing 010 * rights and limitations under the License. 011 * 012 * The Original Code is Knowtator. 013 * 014 * The Initial Developer of the Original Code is University of Colorado. 015 * Copyright (C) 2005 - 2008. All Rights Reserved. 016 * 017 * Knowtator was developed by the Center for Computational Pharmacology 018 * (http://compbio.uchcs.edu) at the University of Colorado Health 019 * Sciences Center School of Medicine with support from the National 020 * Library of Medicine. 021 * 022 * Current information about Knowtator can be obtained at 023 * http://knowtator.sourceforge.net/ 024 * 025 * Contributor(s): 026 * Philip V. Ogren <philip@ogren.info> (Original Author) 027 */ 028 029 package edu.uchsc.ccp.knowtator; 030 031 import java.util.ArrayList; 032 import java.util.Collections; 033 import java.util.Comparator; 034 import java.util.List; 035 import java.util.Set; 036 import java.util.regex.Matcher; 037 import java.util.regex.Pattern; 038 039 import edu.stanford.smi.protege.model.SimpleInstance; 040 import edu.uchsc.ccp.knowtator.event.EventHandler; 041 import edu.uchsc.ccp.knowtator.textsource.TextSource; 042 import edu.uchsc.ccp.knowtator.textsource.TextSourceAccessException; 043 import edu.uchsc.ccp.knowtator.textsource.TextSourceChangeEvent; 044 import edu.uchsc.ccp.knowtator.textsource.TextSourceChangeListener; 045 046 public class SpanUtil implements TextSourceChangeListener { 047 public static final String GROW_ANNOTATION_LEFT = "GROW_ANNOTATION_LEFT"; 048 049 public static final String GROW_ANNOTATION_LEFT_WORD = "GROW_ANNOTATION_LEFT_WORD"; 050 051 public static final String GROW_ANNOTATION_RIGHT = "GROW_ANNOTATION_RIGHT"; 052 053 public static final String GROW_ANNOTATION_RIGHT_WORD = "GROW_ANNOTATION_RIGHT_WORD"; 054 055 public static final String SHRINK_ANNOTATION_LEFT = "SHRINK_ANNOTATION_LEFT"; 056 057 public static final String SHRINK_ANNOTATION_LEFT_WORD = "SHRINK_ANNOTATION_LEFT_WORD"; 058 059 public static final String SHRINK_ANNOTATION_RIGHT = "SHRINK_ANNOTATION_RIGHT"; 060 061 public static final String SHRINK_ANNOTATION_RIGHT_WORD = "SHRINK_ANNOTATION_RIGHT_WORD"; 062 063 TextSource textSource; 064 065 KnowtatorManager manager; 066 067 KnowtatorProjectUtil kpu; 068 069 AnnotationUtil annotationUtil; 070 071 /** Creates a new instance of SpanUtil */ 072 public SpanUtil(KnowtatorManager manager) { 073 this.manager = manager; 074 this.kpu = manager.getKnowtatorProjectUtil(); 075 this.annotationUtil = manager.getAnnotationUtil(); 076 } 077 078 public void textSourceChanged(TextSourceChangeEvent event) { 079 textSource = event.getTextSource(); 080 } 081 082 public boolean canGrowSpanRight(Span span) { 083 try { 084 if (Span.isValid(span.getStart(), span.getEnd() + 1)) { 085 if (span.getEnd() + 1 <= textSource.getText().length()) { 086 return true; 087 } 088 } 089 return false; 090 } catch (Exception exception) { 091 return false; 092 } 093 } 094 095 public boolean canShrinkSpanRight(Span span) { 096 return Span.isValid(span.getStart(), span.getEnd() - 1); 097 } 098 099 public boolean canGrowSpanLeft(Span span) { 100 return Span.isValid(span.getStart() - 1, span.getEnd()); 101 } 102 103 public boolean canShrinkSpanLeft(Span span) { 104 return Span.isValid(span.getStart() + 1, span.getEnd()); 105 } 106 107 private void growSpanRight(Span span, SimpleInstance annotation) throws InvalidSpanException, 108 TextSourceAccessException { 109 if (canGrowSpanRight(span)) 110 editSpan(span, new Span(span.getStart(), span.getEnd() + 1), annotation); 111 } 112 113 private void growSpanRightWord(Span span, SimpleInstance annotation) throws InvalidSpanException, 114 TextSourceAccessException { 115 String text = manager.getTextSourceUtil().getCurrentTextSource().getText(); 116 Span superspan = expandToSplits(text, new Span(span.getStart(), span.getEnd() + 2), 30, 30, Pattern 117 .compile(manager.getTokenRegex())); 118 if (superspan != null) { 119 if (superspan.getEnd() == span.getEnd()) 120 growSpanRight(span, annotation); 121 else if (superspan.getEnd() > span.getEnd()) { 122 editSpan(span, new Span(span.getStart(), superspan.getEnd()), annotation); 123 } 124 } 125 } 126 127 private void shrinkSpanRight(Span span, SimpleInstance annotation) throws InvalidSpanException, 128 TextSourceAccessException { 129 if (canShrinkSpanRight(span)) 130 editSpan(span, new Span(span.getStart(), span.getEnd() - 1), annotation); 131 } 132 133 private void shrinkSpanRightWord(Span span, SimpleInstance annotation) throws InvalidSpanException, 134 TextSourceAccessException { 135 String text = manager.getTextSourceUtil().getCurrentTextSource().getText(); 136 Span subspan = shrinkRight(text, span, Pattern.compile(manager.getTokenRegex())); 137 if (subspan != null) { 138 if (subspan.getEnd() == span.getEnd()) 139 shrinkSpanRight(span, annotation); 140 else if (subspan.getEnd() < span.getEnd()) { 141 editSpan(span, subspan, annotation); 142 } 143 } 144 } 145 146 private void shrinkSpanLeftWord(Span span, SimpleInstance annotation) throws InvalidSpanException, 147 TextSourceAccessException { 148 String text = manager.getTextSourceUtil().getCurrentTextSource().getText(); 149 Span subspan = shrinkLeft(text, span, Pattern.compile(manager.getTokenRegex())); 150 if (subspan != null) { 151 if (subspan.getStart() == span.getStart()) 152 shrinkSpanLeft(span, annotation); 153 else if (subspan.getStart() > span.getStart()) { 154 editSpan(span, subspan, annotation); 155 } 156 } 157 } 158 159 private void growSpanLeft(Span span, SimpleInstance annotation) throws InvalidSpanException, 160 TextSourceAccessException { 161 if (canGrowSpanLeft(span)) 162 editSpan(span, new Span(span.getStart() - 1, span.getEnd()), annotation); 163 } 164 165 private void growSpanLeftWord(Span span, SimpleInstance annotation) throws InvalidSpanException, 166 TextSourceAccessException { 167 if (span.getStart() == 0) 168 return; 169 String text = manager.getTextSourceUtil().getCurrentTextSource().getText(); 170 171 Span superspan = expandToSplits(text, new Span(span.getStart() - 2, span.getEnd()), 30, 30, Pattern 172 .compile(manager.getTokenRegex())); 173 if (superspan != null) { 174 if (superspan.getStart() == span.getStart()) 175 growSpanLeft(span, annotation); 176 else if (superspan.getStart() < span.getStart() && superspan.getStart() >= 0) { 177 editSpan(span, new Span(superspan.getStart(), span.getEnd()), annotation); 178 } 179 } 180 } 181 182 private void shrinkSpanLeft(Span span, SimpleInstance annotation) throws InvalidSpanException, 183 TextSourceAccessException { 184 if (canShrinkSpanLeft(span)) 185 editSpan(span, new Span(span.getStart() + 1, span.getEnd()), annotation); 186 } 187 188 /** 189 * The strategy for updating a span is to get the Span objects for the 190 * annotationInstance loop through them and find the one that matches 191 * 'oldSpan' and update that span with the 'newSpan' in the list of spans 192 * returned from annotationUtil.getSpans. The list of spans are then sent 193 * back to annotationUtil.setSpans. 194 */ 195 196 private void editSpan(Span oldSpan, Span newSpan, SimpleInstance annotation) throws InvalidSpanException, 197 TextSourceAccessException { 198 List<Span> spans = new ArrayList<Span>(annotationUtil.getSpans(annotation)); 199 if (spans.size() == 0) { 200 System.out.println("empty span list"); 201 } 202 for (int i = 0; i < spans.size(); i++) { 203 Span annotationSpan = spans.get(i); 204 if (annotationSpan.equals(oldSpan)) { 205 spans.set(i, newSpan); 206 if (newSpan.getStart() == newSpan.getEnd()) 207 spans.remove(i); 208 break; 209 } 210 } 211 annotationUtil.setSpans(annotation, spans, null); 212 manager.refreshAnnotationsDisplay(true); 213 } 214 215 public void editSpans(List<Span> spans, SimpleInstance annotation, String editType) throws InvalidSpanException, 216 TextSourceAccessException { 217 for (Span span : spans) { 218 if (editType.equals(GROW_ANNOTATION_LEFT)) 219 growSpanLeft(span, annotation); 220 if (editType.equals(GROW_ANNOTATION_LEFT_WORD)) 221 growSpanLeftWord(span, annotation); 222 else if (editType.equals(GROW_ANNOTATION_RIGHT)) 223 growSpanRight(span, annotation); 224 else if (editType.equals(GROW_ANNOTATION_RIGHT_WORD)) 225 growSpanRightWord(span, annotation); 226 else if (editType.equals(SHRINK_ANNOTATION_LEFT)) 227 shrinkSpanLeft(span, annotation); 228 else if (editType.equals(SHRINK_ANNOTATION_LEFT_WORD)) 229 shrinkSpanLeftWord(span, annotation); 230 else if (editType.equals(SHRINK_ANNOTATION_RIGHT)) 231 shrinkSpanRight(span, annotation); 232 else if (editType.equals(SHRINK_ANNOTATION_RIGHT_WORD)) 233 shrinkSpanRightWord(span, annotation); 234 } 235 EventHandler.getInstance().fireSpanEditted(annotation); 236 237 } 238 239 /** 240 * The purpose of this method is to "expand" the substringSpan such that the 241 * start and end of the returned span are at splits in the string. 242 * 243 * @param superString 244 * the string that is being looked at 245 * @param substringSpan 246 * a span that designates some arbitrary substring of the 247 * superString 248 * @param frontWindowSize 249 * the string that immediately precedes the substring is 250 * considered the frontwindow. This parameter sets the maximum 251 * size of the frontwindow (it may be smaller if the superstring 252 * doesn't have enough text that precedes the substring.) 253 * @param rearWindowSize 254 * the string that immediately follows the substring is 255 * considered the rearwindow. This parameter sets the maximum 256 * size of the rearwindow (it may be smaller if the superstring 257 * doesn't have enough text that follows the substring.) 258 * @param splitPattern 259 * a regular expression that defines a "split" or a word 260 * boundary. For example, you might pass in Pattern.compile(\\W+) 261 * to define a boundary as non-word characters. 262 * @return a span that is "expanded" to start and end at word boundaries. 263 * See the associated unit tests for examples. Typically the 264 * substring associated with the returned span will be larger than 265 * the substring associated with the substringSpan parameter. 266 * However, if the substring associated with the substringSpan 267 * begins or ends with the splitPattern, then the returned substring 268 * may be shorter. 269 * 270 */ 271 public static Span expandToSplits(String superString, Span substringSpan, int frontWindowSize, int rearWindowSize, 272 Pattern splitPattern) { 273 try { 274 if (!(new Span(0, superString.length()).contains(substringSpan))) { 275 return null; 276 } 277 278 String substring = superString.substring(substringSpan.getStart(), substringSpan.getEnd()); 279 280 // First we will determine the start of the returned span 281 // If the substring starts with the splitPattern then we will move 282 // the start 283 // to the "right". Otherwise we will look for the last splitPattern 284 // in the 285 // front window. 286 int returnSpanStart = substringSpan.getStart(); 287 288 // shrink the span start if substring begins with splitPattern 289 boolean shrinkSpanStart = false; 290 Matcher matcher = splitPattern.matcher(substring); 291 if (matcher.find()) { 292 if (matcher.start() == 0) { 293 returnSpanStart += matcher.end(); 294 shrinkSpanStart = true; 295 } 296 } 297 298 // if substring does not begin with splitPattern, then find last 299 // splitPattern 300 // in frontWindow 301 if (!shrinkSpanStart) { 302 // The front window starts at either 0 or frontWindowSize from 303 // the substringSpan start 304 int frontWindowStart = Math.max(0, substringSpan.getStart() - frontWindowSize); 305 String frontWindow = superString.substring(frontWindowStart, substringSpan.getStart()); 306 matcher = splitPattern.matcher(frontWindow); 307 int lastMatch = -1; 308 if (frontWindowStart == 0) { 309 lastMatch = 0; 310 } 311 while (matcher.find()) { 312 lastMatch = matcher.end(); 313 } 314 315 if (lastMatch > -1) { 316 returnSpanStart -= frontWindow.substring(lastMatch).length(); 317 } 318 } 319 320 // now we figure out the end of the returned span 321 // If the substring ends with the splitPattern then we will move the 322 // end 323 // to the "left". Otherwise we will look for the last splitPattern 324 // in the 325 // front window. 326 int returnSpanEnd = substringSpan.getEnd(); 327 328 // shrink the span end if substring ends with splitPattern 329 boolean shrinkSpanEnd = false; 330 StringBuffer reverseStringBuffer = new StringBuffer(substring).reverse(); 331 String reverseSubstring = reverseStringBuffer.toString(); 332 333 matcher = splitPattern.matcher(reverseSubstring); 334 if (matcher.find()) { 335 if (matcher.start() == 0) { 336 returnSpanEnd -= matcher.end(); 337 shrinkSpanEnd = true; 338 } 339 } 340 341 if (!shrinkSpanEnd) { 342 int rearWindowEnd = Math.min(superString.length(), substringSpan.getEnd() + rearWindowSize); 343 String rearWindow = superString.substring(substringSpan.getEnd(), rearWindowEnd); 344 matcher = splitPattern.matcher(rearWindow); 345 if (matcher.find()) { 346 if (matcher.start() != 0) { 347 returnSpanEnd = returnSpanEnd + matcher.start(); 348 } 349 } else if (rearWindowEnd == superString.length()) { 350 returnSpanEnd = rearWindowEnd; 351 } 352 353 } 354 return new Span(returnSpanStart, returnSpanEnd); 355 } catch (InvalidSpanException ise) { 356 return substringSpan; 357 } 358 359 } 360 361 public static Span shrinkRight(String superString, Span span, Pattern splitPattern) { 362 if (!(new Span(0, superString.length()).contains(span))) { 363 return null; 364 } 365 366 String spannedString = Span.substring(superString, span); 367 Matcher matcher = splitPattern.matcher(spannedString); 368 int lastIndex = -1; 369 while (matcher.find()) { 370 lastIndex = matcher.start(); 371 } 372 373 if (lastIndex == -1) 374 return span; 375 376 lastIndex = span.getStart() + lastIndex; 377 if (lastIndex > span.getStart() && lastIndex < span.getEnd()) 378 return new Span(span.getStart(), lastIndex); 379 380 return span; 381 } 382 383 public static Span shrinkLeft(String superString, Span span, Pattern splitPattern) { 384 if (!(new Span(0, superString.length()).contains(span))) { 385 return null; 386 } 387 388 String spannedString = Span.substring(superString, span); 389 Matcher matcher = splitPattern.matcher(spannedString); 390 int lastIndex = -1; 391 if (matcher.find()) { 392 lastIndex = matcher.end(); 393 } 394 395 if (lastIndex == -1) 396 return span; 397 398 lastIndex = span.getStart() + lastIndex; 399 if (lastIndex > span.getStart() && lastIndex < span.getEnd()) 400 return new Span(lastIndex, span.getEnd()); 401 402 return span; 403 } 404 405 public Comparator<SimpleInstance> comparator(final Comparator<SimpleInstance> noSpansComparator) { 406 return new Comparator<SimpleInstance>() { 407 public int compare(SimpleInstance annotation1, SimpleInstance annotation2) { 408 List<Span> spans1; 409 try { 410 spans1 = new ArrayList<Span>(annotationUtil.getSpans(annotation1)); 411 if (spans1.size() == 0) { 412 Span referencedSpan = getAReferencedSpan(annotation1); 413 if (referencedSpan != null) 414 spans1.add(referencedSpan); 415 } 416 } catch (InvalidSpanException ise) { 417 spans1 = new ArrayList<Span>(); 418 } 419 List<Span> spans2; 420 try { 421 spans2 = new ArrayList<Span>(annotationUtil.getSpans(annotation2)); 422 if (spans2.size() == 0) { 423 Span referencedSpan = getAReferencedSpan(annotation2); 424 if (referencedSpan != null) 425 spans2.add(referencedSpan); 426 } 427 428 } catch (InvalidSpanException ise) { 429 spans2 = new ArrayList<Span>(); 430 } 431 if (spans1.size() == 0 && spans2.size() == 0 && noSpansComparator != null) { 432 return noSpansComparator.compare(annotation1, annotation2); 433 } else if (spans1.size() == 0) { 434 return 1; 435 } else if (spans2.size() == 0) { 436 return -1; 437 } else { 438 int comparison = spans1.get(0).compareTo(spans2.get(0)); 439 if (comparison == 0 && noSpansComparator != null) { 440 return noSpansComparator.compare(annotation1, annotation2); 441 } else 442 return comparison; 443 } 444 } 445 }; 446 } 447 448 public Comparator lengthComparator() { 449 return new Comparator<SimpleInstance>() { 450 public int compare(SimpleInstance annotation1, SimpleInstance annotation2) { 451 List<Span> spans1 = annotationUtil.getSpans(annotation1); 452 List<Span> spans2 = annotationUtil.getSpans(annotation2); 453 if (spans1.size() == 0) { 454 return -1; 455 } else if (spans2.size() == 0) { 456 return 1; 457 } else { 458 int length1 = spansLength(spans1); 459 int length2 = spansLength(spans2); 460 461 if (length1 <= length2) 462 return -1; 463 else 464 return 1; 465 } 466 } 467 }; 468 } 469 470 public int spansLength(List<Span> spans) { 471 int length = 0; 472 for (Span span : spans) { 473 length += span.getSize(); 474 } 475 return length; 476 } 477 478 public Span getAReferencedSpan(SimpleInstance annotation) { 479 List<Span> returnValues = new ArrayList<Span>(); 480 Set<SimpleInstance> referencedAnnotations = annotationUtil.getRelatedAnnotations(annotation); 481 for (SimpleInstance referencedAnnotation : referencedAnnotations) { 482 List<Span> spans = annotationUtil.getSpans(referencedAnnotation); 483 returnValues.addAll(spans); 484 485 } 486 if (returnValues.size() > 0) { 487 Collections.sort(returnValues); 488 return returnValues.get(0); 489 } 490 return null; 491 } 492 493 /** 494 * 495 * @param annotation 496 * @return the span of the annotation that has the smallest start index. If 497 * no span exists for the annotation, then it will return the span 498 * with the smallest start index from all of the spans of all of the 499 * annotations (recursively) related annotations 500 */ 501 public Span getFirstSpan(SimpleInstance annotation) { 502 List<Span> spans1 = annotationUtil.getSpans(annotation); 503 if (spans1 == null || spans1.size() == 0) { 504 return manager.getSpanUtil().getAReferencedSpan(annotation); 505 } else { 506 Collections.sort(spans1); 507 return spans1.get(0); 508 } 509 } 510 }