001    /*
002     * The contents of this file are subject to the Mozilla Public
003     * License Version 1.1 (the "License"); you may not use this file
004     * except in compliance with the License. You may obtain a copy of
005     * the License at http://www.mozilla.org/MPL/
006     *
007     * Software distributed under the License is distributed on an "AS
008     * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009     * implied. See the License for the specific language governing
010     * rights and limitations under the License.
011     *
012     * The Original Code is Knowtator.
013     *
014     * The Initial Developer of the Original Code is University of Colorado.  
015     * Copyright (C) 2005 - 2008.  All Rights Reserved.
016     *
017     * Knowtator was developed by the Center for Computational Pharmacology
018     * (http://compbio.uchcs.edu) at the University of Colorado Health 
019     *  Sciences Center School of Medicine with support from the National 
020     *  Library of Medicine.  
021     *
022     * Current information about Knowtator can be obtained at 
023     * http://knowtator.sourceforge.net/
024     *
025     * Contributor(s):
026     *   Philip V. Ogren <philip@ogren.info> (Original Author)
027     */
028    
029    package edu.uchsc.ccp.knowtator;
030    
031    import java.util.ArrayList;
032    import java.util.Collections;
033    import java.util.Comparator;
034    import java.util.List;
035    import java.util.Set;
036    import java.util.regex.Matcher;
037    import java.util.regex.Pattern;
038    
039    import edu.stanford.smi.protege.model.SimpleInstance;
040    import edu.uchsc.ccp.knowtator.event.EventHandler;
041    import edu.uchsc.ccp.knowtator.textsource.TextSource;
042    import edu.uchsc.ccp.knowtator.textsource.TextSourceAccessException;
043    import edu.uchsc.ccp.knowtator.textsource.TextSourceChangeEvent;
044    import edu.uchsc.ccp.knowtator.textsource.TextSourceChangeListener;
045    
046    public class SpanUtil implements TextSourceChangeListener {
047            public static final String GROW_ANNOTATION_LEFT = "GROW_ANNOTATION_LEFT";
048    
049            public static final String GROW_ANNOTATION_LEFT_WORD = "GROW_ANNOTATION_LEFT_WORD";
050    
051            public static final String GROW_ANNOTATION_RIGHT = "GROW_ANNOTATION_RIGHT";
052    
053            public static final String GROW_ANNOTATION_RIGHT_WORD = "GROW_ANNOTATION_RIGHT_WORD";
054    
055            public static final String SHRINK_ANNOTATION_LEFT = "SHRINK_ANNOTATION_LEFT";
056    
057            public static final String SHRINK_ANNOTATION_LEFT_WORD = "SHRINK_ANNOTATION_LEFT_WORD";
058    
059            public static final String SHRINK_ANNOTATION_RIGHT = "SHRINK_ANNOTATION_RIGHT";
060    
061            public static final String SHRINK_ANNOTATION_RIGHT_WORD = "SHRINK_ANNOTATION_RIGHT_WORD";
062    
063            TextSource textSource;
064    
065            KnowtatorManager manager;
066    
067            KnowtatorProjectUtil kpu;
068    
069            AnnotationUtil annotationUtil;
070    
071            /** Creates a new instance of SpanUtil */
072            public SpanUtil(KnowtatorManager manager) {
073                    this.manager = manager;
074                    this.kpu = manager.getKnowtatorProjectUtil();
075                    this.annotationUtil = manager.getAnnotationUtil();
076            }
077    
078            public void textSourceChanged(TextSourceChangeEvent event) {
079                    textSource = event.getTextSource();
080            }
081    
082            public boolean canGrowSpanRight(Span span) {
083                    try {
084                            if (Span.isValid(span.getStart(), span.getEnd() + 1)) {
085                                    if (span.getEnd() + 1 <= textSource.getText().length()) {
086                                            return true;
087                                    }
088                            }
089                            return false;
090                    } catch (Exception exception) {
091                            return false;
092                    }
093            }
094    
095            public boolean canShrinkSpanRight(Span span) {
096                    return Span.isValid(span.getStart(), span.getEnd() - 1);
097            }
098    
099            public boolean canGrowSpanLeft(Span span) {
100                    return Span.isValid(span.getStart() - 1, span.getEnd());
101            }
102    
103            public boolean canShrinkSpanLeft(Span span) {
104                    return Span.isValid(span.getStart() + 1, span.getEnd());
105            }
106    
107            private void growSpanRight(Span span, SimpleInstance annotation) throws InvalidSpanException,
108                            TextSourceAccessException {
109                    if (canGrowSpanRight(span))
110                            editSpan(span, new Span(span.getStart(), span.getEnd() + 1), annotation);
111            }
112    
113            private void growSpanRightWord(Span span, SimpleInstance annotation) throws InvalidSpanException,
114                            TextSourceAccessException {
115                    String text = manager.getTextSourceUtil().getCurrentTextSource().getText();
116                    Span superspan = expandToSplits(text, new Span(span.getStart(), span.getEnd() + 2), 30, 30, Pattern
117                                    .compile(manager.getTokenRegex()));
118                    if (superspan != null) {
119                            if (superspan.getEnd() == span.getEnd())
120                                    growSpanRight(span, annotation);
121                            else if (superspan.getEnd() > span.getEnd()) {
122                                    editSpan(span, new Span(span.getStart(), superspan.getEnd()), annotation);
123                            }
124                    }
125            }
126    
127            private void shrinkSpanRight(Span span, SimpleInstance annotation) throws InvalidSpanException,
128                            TextSourceAccessException {
129                    if (canShrinkSpanRight(span))
130                            editSpan(span, new Span(span.getStart(), span.getEnd() - 1), annotation);
131            }
132    
133            private void shrinkSpanRightWord(Span span, SimpleInstance annotation) throws InvalidSpanException,
134                            TextSourceAccessException {
135                    String text = manager.getTextSourceUtil().getCurrentTextSource().getText();
136                    Span subspan = shrinkRight(text, span, Pattern.compile(manager.getTokenRegex()));
137                    if (subspan != null) {
138                            if (subspan.getEnd() == span.getEnd())
139                                    shrinkSpanRight(span, annotation);
140                            else if (subspan.getEnd() < span.getEnd()) {
141                                    editSpan(span, subspan, annotation);
142                            }
143                    }
144            }
145    
146            private void shrinkSpanLeftWord(Span span, SimpleInstance annotation) throws InvalidSpanException,
147                            TextSourceAccessException {
148                    String text = manager.getTextSourceUtil().getCurrentTextSource().getText();
149                    Span subspan = shrinkLeft(text, span, Pattern.compile(manager.getTokenRegex()));
150                    if (subspan != null) {
151                            if (subspan.getStart() == span.getStart())
152                                    shrinkSpanLeft(span, annotation);
153                            else if (subspan.getStart() > span.getStart()) {
154                                    editSpan(span, subspan, annotation);
155                            }
156                    }
157            }
158    
159            private void growSpanLeft(Span span, SimpleInstance annotation) throws InvalidSpanException,
160                            TextSourceAccessException {
161                    if (canGrowSpanLeft(span))
162                            editSpan(span, new Span(span.getStart() - 1, span.getEnd()), annotation);
163            }
164    
165            private void growSpanLeftWord(Span span, SimpleInstance annotation) throws InvalidSpanException,
166                            TextSourceAccessException {
167                    if (span.getStart() == 0)
168                            return;
169                    String text = manager.getTextSourceUtil().getCurrentTextSource().getText();
170    
171                    Span superspan = expandToSplits(text, new Span(span.getStart() - 2, span.getEnd()), 30, 30, Pattern
172                                    .compile(manager.getTokenRegex()));
173                    if (superspan != null) {
174                            if (superspan.getStart() == span.getStart())
175                                    growSpanLeft(span, annotation);
176                            else if (superspan.getStart() < span.getStart() && superspan.getStart() >= 0) {
177                                    editSpan(span, new Span(superspan.getStart(), span.getEnd()), annotation);
178                            }
179                    }
180            }
181    
182            private void shrinkSpanLeft(Span span, SimpleInstance annotation) throws InvalidSpanException,
183                            TextSourceAccessException {
184                    if (canShrinkSpanLeft(span))
185                            editSpan(span, new Span(span.getStart() + 1, span.getEnd()), annotation);
186            }
187    
188            /**
189             * The strategy for updating a span is to get the Span objects for the
190             * annotationInstance loop through them and find the one that matches
191             * 'oldSpan' and update that span with the 'newSpan' in the list of spans
192             * returned from annotationUtil.getSpans. The list of spans are then sent
193             * back to annotationUtil.setSpans.
194             */
195    
196            private void editSpan(Span oldSpan, Span newSpan, SimpleInstance annotation) throws InvalidSpanException,
197                            TextSourceAccessException {
198                    List<Span> spans = new ArrayList<Span>(annotationUtil.getSpans(annotation));
199                    if (spans.size() == 0) {
200                            System.out.println("empty span list");
201                    }
202                    for (int i = 0; i < spans.size(); i++) {
203                            Span annotationSpan = spans.get(i);
204                            if (annotationSpan.equals(oldSpan)) {
205                                    spans.set(i, newSpan);
206                                    if (newSpan.getStart() == newSpan.getEnd())
207                                            spans.remove(i);
208                                    break;
209                            }
210                    }
211                    annotationUtil.setSpans(annotation, spans, null);
212                    manager.refreshAnnotationsDisplay(true);
213            }
214    
215            public void editSpans(List<Span> spans, SimpleInstance annotation, String editType) throws InvalidSpanException,
216                            TextSourceAccessException {
217                    for (Span span : spans) {
218                            if (editType.equals(GROW_ANNOTATION_LEFT))
219                                    growSpanLeft(span, annotation);
220                            if (editType.equals(GROW_ANNOTATION_LEFT_WORD))
221                                    growSpanLeftWord(span, annotation);
222                            else if (editType.equals(GROW_ANNOTATION_RIGHT))
223                                    growSpanRight(span, annotation);
224                            else if (editType.equals(GROW_ANNOTATION_RIGHT_WORD))
225                                    growSpanRightWord(span, annotation);
226                            else if (editType.equals(SHRINK_ANNOTATION_LEFT))
227                                    shrinkSpanLeft(span, annotation);
228                            else if (editType.equals(SHRINK_ANNOTATION_LEFT_WORD))
229                                    shrinkSpanLeftWord(span, annotation);
230                            else if (editType.equals(SHRINK_ANNOTATION_RIGHT))
231                                    shrinkSpanRight(span, annotation);
232                            else if (editType.equals(SHRINK_ANNOTATION_RIGHT_WORD))
233                                    shrinkSpanRightWord(span, annotation);
234                    }
235                    EventHandler.getInstance().fireSpanEditted(annotation);
236    
237            }
238    
239            /**
240             * The purpose of this method is to "expand" the substringSpan such that the
241             * start and end of the returned span are at splits in the string.
242             * 
243             * @param superString
244             *            the string that is being looked at
245             * @param substringSpan
246             *            a span that designates some arbitrary substring of the
247             *            superString
248             * @param frontWindowSize
249             *            the string that immediately precedes the substring is
250             *            considered the frontwindow. This parameter sets the maximum
251             *            size of the frontwindow (it may be smaller if the superstring
252             *            doesn't have enough text that precedes the substring.)
253             * @param rearWindowSize
254             *            the string that immediately follows the substring is
255             *            considered the rearwindow. This parameter sets the maximum
256             *            size of the rearwindow (it may be smaller if the superstring
257             *            doesn't have enough text that follows the substring.)
258             * @param splitPattern
259             *            a regular expression that defines a "split" or a word
260             *            boundary. For example, you might pass in Pattern.compile(\\W+)
261             *            to define a boundary as non-word characters.
262             * @return a span that is "expanded" to start and end at word boundaries.
263             *         See the associated unit tests for examples. Typically the
264             *         substring associated with the returned span will be larger than
265             *         the substring associated with the substringSpan parameter.
266             *         However, if the substring associated with the substringSpan
267             *         begins or ends with the splitPattern, then the returned substring
268             *         may be shorter.
269             * 
270             */
271            public static Span expandToSplits(String superString, Span substringSpan, int frontWindowSize, int rearWindowSize,
272                            Pattern splitPattern) {
273                    try {
274                            if (!(new Span(0, superString.length()).contains(substringSpan))) {
275                                    return null;
276                            }
277    
278                            String substring = superString.substring(substringSpan.getStart(), substringSpan.getEnd());
279    
280                            // First we will determine the start of the returned span
281                            // If the substring starts with the splitPattern then we will move
282                            // the start
283                            // to the "right". Otherwise we will look for the last splitPattern
284                            // in the
285                            // front window.
286                            int returnSpanStart = substringSpan.getStart();
287    
288                            // shrink the span start if substring begins with splitPattern
289                            boolean shrinkSpanStart = false;
290                            Matcher matcher = splitPattern.matcher(substring);
291                            if (matcher.find()) {
292                                    if (matcher.start() == 0) {
293                                            returnSpanStart += matcher.end();
294                                            shrinkSpanStart = true;
295                                    }
296                            }
297    
298                            // if substring does not begin with splitPattern, then find last
299                            // splitPattern
300                            // in frontWindow
301                            if (!shrinkSpanStart) {
302                                    // The front window starts at either 0 or frontWindowSize from
303                                    // the substringSpan start
304                                    int frontWindowStart = Math.max(0, substringSpan.getStart() - frontWindowSize);
305                                    String frontWindow = superString.substring(frontWindowStart, substringSpan.getStart());
306                                    matcher = splitPattern.matcher(frontWindow);
307                                    int lastMatch = -1;
308                                    if (frontWindowStart == 0) {
309                                            lastMatch = 0;
310                                    }
311                                    while (matcher.find()) {
312                                            lastMatch = matcher.end();
313                                    }
314    
315                                    if (lastMatch > -1) {
316                                            returnSpanStart -= frontWindow.substring(lastMatch).length();
317                                    }
318                            }
319    
320                            // now we figure out the end of the returned span
321                            // If the substring ends with the splitPattern then we will move the
322                            // end
323                            // to the "left". Otherwise we will look for the last splitPattern
324                            // in the
325                            // front window.
326                            int returnSpanEnd = substringSpan.getEnd();
327    
328                            // shrink the span end if substring ends with splitPattern
329                            boolean shrinkSpanEnd = false;
330                            StringBuffer reverseStringBuffer = new StringBuffer(substring).reverse();
331                            String reverseSubstring = reverseStringBuffer.toString();
332    
333                            matcher = splitPattern.matcher(reverseSubstring);
334                            if (matcher.find()) {
335                                    if (matcher.start() == 0) {
336                                            returnSpanEnd -= matcher.end();
337                                            shrinkSpanEnd = true;
338                                    }
339                            }
340    
341                            if (!shrinkSpanEnd) {
342                                    int rearWindowEnd = Math.min(superString.length(), substringSpan.getEnd() + rearWindowSize);
343                                    String rearWindow = superString.substring(substringSpan.getEnd(), rearWindowEnd);
344                                    matcher = splitPattern.matcher(rearWindow);
345                                    if (matcher.find()) {
346                                            if (matcher.start() != 0) {
347                                                    returnSpanEnd = returnSpanEnd + matcher.start();
348                                            }
349                                    } else if (rearWindowEnd == superString.length()) {
350                                            returnSpanEnd = rearWindowEnd;
351                                    }
352    
353                            }
354                            return new Span(returnSpanStart, returnSpanEnd);
355                    } catch (InvalidSpanException ise) {
356                            return substringSpan;
357                    }
358    
359            }
360    
361            public static Span shrinkRight(String superString, Span span, Pattern splitPattern) {
362                    if (!(new Span(0, superString.length()).contains(span))) {
363                            return null;
364                    }
365    
366                    String spannedString = Span.substring(superString, span);
367                    Matcher matcher = splitPattern.matcher(spannedString);
368                    int lastIndex = -1;
369                    while (matcher.find()) {
370                            lastIndex = matcher.start();
371                    }
372    
373                    if (lastIndex == -1)
374                            return span;
375    
376                    lastIndex = span.getStart() + lastIndex;
377                    if (lastIndex > span.getStart() && lastIndex < span.getEnd())
378                            return new Span(span.getStart(), lastIndex);
379    
380                    return span;
381            }
382    
383            public static Span shrinkLeft(String superString, Span span, Pattern splitPattern) {
384                    if (!(new Span(0, superString.length()).contains(span))) {
385                            return null;
386                    }
387    
388                    String spannedString = Span.substring(superString, span);
389                    Matcher matcher = splitPattern.matcher(spannedString);
390                    int lastIndex = -1;
391                    if (matcher.find()) {
392                            lastIndex = matcher.end();
393                    }
394    
395                    if (lastIndex == -1)
396                            return span;
397    
398                    lastIndex = span.getStart() + lastIndex;
399                    if (lastIndex > span.getStart() && lastIndex < span.getEnd())
400                            return new Span(lastIndex, span.getEnd());
401    
402                    return span;
403            }
404    
405            public Comparator<SimpleInstance> comparator(final Comparator<SimpleInstance> noSpansComparator) {
406                    return new Comparator<SimpleInstance>() {
407                            public int compare(SimpleInstance annotation1, SimpleInstance annotation2) {
408                                    List<Span> spans1;
409                                    try {
410                                            spans1 = new ArrayList<Span>(annotationUtil.getSpans(annotation1));
411                                            if (spans1.size() == 0) {
412                                                    Span referencedSpan = getAReferencedSpan(annotation1);
413                                                    if (referencedSpan != null)
414                                                            spans1.add(referencedSpan);
415                                            }
416                                    } catch (InvalidSpanException ise) {
417                                            spans1 = new ArrayList<Span>();
418                                    }
419                                    List<Span> spans2;
420                                    try {
421                                            spans2 = new ArrayList<Span>(annotationUtil.getSpans(annotation2));
422                                            if (spans2.size() == 0) {
423                                                    Span referencedSpan = getAReferencedSpan(annotation2);
424                                                    if (referencedSpan != null)
425                                                            spans2.add(referencedSpan);
426                                            }
427    
428                                    } catch (InvalidSpanException ise) {
429                                            spans2 = new ArrayList<Span>();
430                                    }
431                                    if (spans1.size() == 0 && spans2.size() == 0 && noSpansComparator != null) {
432                                            return noSpansComparator.compare(annotation1, annotation2);
433                                    } else if (spans1.size() == 0) {
434                                            return 1;
435                                    } else if (spans2.size() == 0) {
436                                            return -1;
437                                    } else {
438                                            int comparison = spans1.get(0).compareTo(spans2.get(0));
439                                            if (comparison == 0 && noSpansComparator != null) {
440                                                    return noSpansComparator.compare(annotation1, annotation2);
441                                            } else
442                                                    return comparison;
443                                    }
444                            }
445                    };
446            }
447    
448            public Comparator lengthComparator() {
449                    return new Comparator<SimpleInstance>() {
450                            public int compare(SimpleInstance annotation1, SimpleInstance annotation2) {
451                                    List<Span> spans1 = annotationUtil.getSpans(annotation1);
452                                    List<Span> spans2 = annotationUtil.getSpans(annotation2);
453                                    if (spans1.size() == 0) {
454                                            return -1;
455                                    } else if (spans2.size() == 0) {
456                                            return 1;
457                                    } else {
458                                            int length1 = spansLength(spans1);
459                                            int length2 = spansLength(spans2);
460    
461                                            if (length1 <= length2)
462                                                    return -1;
463                                            else
464                                                    return 1;
465                                    }
466                            }
467                    };
468            }
469    
470            public int spansLength(List<Span> spans) {
471                    int length = 0;
472                    for (Span span : spans) {
473                            length += span.getSize();
474                    }
475                    return length;
476            }
477    
478            public Span getAReferencedSpan(SimpleInstance annotation) {
479                    List<Span> returnValues = new ArrayList<Span>();
480                    Set<SimpleInstance> referencedAnnotations = annotationUtil.getRelatedAnnotations(annotation);
481                    for (SimpleInstance referencedAnnotation : referencedAnnotations) {
482                            List<Span> spans = annotationUtil.getSpans(referencedAnnotation);
483                            returnValues.addAll(spans);
484    
485                    }
486                    if (returnValues.size() > 0) {
487                            Collections.sort(returnValues);
488                            return returnValues.get(0);
489                    }
490                    return null;
491            }
492    
493            /**
494             * 
495             * @param annotation
496             * @return the span of the annotation that has the smallest start index. If
497             *         no span exists for the annotation, then it will return the span
498             *         with the smallest start index from all of the spans of all of the
499             *         annotations (recursively) related annotations
500             */
501            public Span getFirstSpan(SimpleInstance annotation) {
502                    List<Span> spans1 = annotationUtil.getSpans(annotation);
503                    if (spans1 == null || spans1.size() == 0) {
504                            return manager.getSpanUtil().getAReferencedSpan(annotation);
505                    } else {
506                            Collections.sort(spans1);
507                            return spans1.get(0);
508                    }
509            }
510    }