001 /*
002 * The contents of this file are subject to the Mozilla Public
003 * License Version 1.1 (the "License"); you may not use this file
004 * except in compliance with the License. You may obtain a copy of
005 * the License at http://www.mozilla.org/MPL/
006 *
007 * Software distributed under the License is distributed on an "AS
008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009 * implied. See the License for the specific language governing
010 * rights and limitations under the License.
011 *
012 * The Original Code is Knowtator.
013 *
014 * The Initial Developer of the Original Code is University of Colorado.
015 * Copyright (C) 2005 - 2008. All Rights Reserved.
016 *
017 * Knowtator was developed by the Center for Computational Pharmacology
018 * (http://compbio.uchcs.edu) at the University of Colorado Health
019 * Sciences Center School of Medicine with support from the National
020 * Library of Medicine.
021 *
022 * Current information about Knowtator can be obtained at
023 * http://knowtator.sourceforge.net/
024 *
025 * Contributor(s):
026 * Philip V. Ogren <philip@ogren.info> (Original Author)
027 */
028 package edu.uchsc.ccp.knowtator.util;
029
030 import java.awt.Component;
031 import java.util.Collection;
032 import java.util.HashSet;
033 import java.util.Set;
034
035 import javax.swing.JOptionPane;
036
037 import edu.stanford.smi.protege.model.Instance;
038 import edu.stanford.smi.protege.model.KnowledgeBase;
039 import edu.stanford.smi.protege.model.Project;
040 import edu.stanford.smi.protege.model.SimpleInstance;
041 import edu.stanford.smi.protege.ui.DisplayUtilities;
042 import edu.stanford.smi.protege.util.CollectionUtilities;
043 import edu.uchsc.ccp.knowtator.AnnotationUtil;
044 import edu.uchsc.ccp.knowtator.FilterUtil;
045 import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil;
046 import edu.uchsc.ccp.knowtator.MentionUtil;
047 import edu.uchsc.ccp.knowtator.TextSourceUtil;
048
049 public class AnnotationCleanup {
050 public static void cleanup(Component parent, KnowledgeBase kb, KnowtatorProjectUtil kpu,
051 TextSourceUtil textSourceUtil, AnnotationUtil annotationUtil, MentionUtil mentionUtil,
052 FilterUtil filterUtil, Project project) {
053
054 int option = JOptionPane.showConfirmDialog(parent,
055 "This option allows you to delete large numbers of unwanted annotations.\n"
056 + "This is done by choosing an annotation filter that defines the set of \n"
057 + "annotations to keep and removing all others. Only annotations that pass\n"
058 + "through the annotation filter and belong to the selected text sources will\n"
059 + "be kept. All others will be deleted. Please archive your project prior to\n"
060 + "to using this option. Also, if you are unhappy with the results, simply close\n"
061 + "the project without saving the changes that have been made. ", "Remove annotations",
062 JOptionPane.OK_CANCEL_OPTION);
063 if (option != JOptionPane.OK_OPTION)
064 return;
065
066 SimpleInstance filter = (SimpleInstance) DisplayUtilities.pickInstance(parent, CollectionUtilities
067 .createCollection(kpu.getFilterCls()), "Select filter for annotations to be kept.");
068 if (filter == null)
069 return;
070
071 Collection<SimpleInstance> textSources = (Collection<SimpleInstance>) DisplayUtilities.pickInstances(parent,
072 kb, CollectionUtilities.createCollection(kpu.getTextSourceCls()),
073 "Select text sources that contain annotations you want to keep.");
074 if (textSources == null || textSources.size() == 0)
075 return;
076
077 option = JOptionPane.showConfirmDialog(parent, "Click OK to proceed with annotation removal.",
078 "Confirm annotation removal", JOptionPane.OK_CANCEL_OPTION);
079 if (option != JOptionPane.OK_OPTION)
080 return;
081
082 cleanup(filter, filterUtil, textSources, annotationUtil, mentionUtil, kb, kpu);
083
084 JOptionPane.showMessageDialog(parent, "Removal of annotations complete.\n"
085 + "To undo changes, please close the project\n" + "without saving it and re-open it.",
086 "Annotation removal complete", JOptionPane.INFORMATION_MESSAGE);
087 }
088
089 public static void cleanup(SimpleInstance filter, FilterUtil filterUtil, Collection<SimpleInstance> textSources,
090 AnnotationUtil annotationUtil, MentionUtil mentionUtil, KnowledgeBase kb, KnowtatorProjectUtil kpu)
091
092 {
093 Set<SimpleInstance> keepers = new HashSet<SimpleInstance>();
094
095 // First we go through the selected text sources and find all the
096 // "keepers" that have passed through the annotation filter.
097 // This will be all annotations and there corresponding mentions and the
098 // annotations that are connected via those
099 // mentions.
100 // Technically, an annotation that does not pass through the filter
101 // could be a keeper if it is a slot value of an annotation
102 // that does.
103 for (SimpleInstance textSource : textSources) {
104 Collection<SimpleInstance> annotations = annotationUtil.getAnnotations(textSource);
105 annotations = filterUtil.filterAnnotations(annotations, filter);
106 for (SimpleInstance annotation : annotations) {
107 keepers.add(annotation);
108 SimpleInstance mention = annotationUtil.getMention(annotation);
109 keepers.add(mention);
110 Set<SimpleInstance> connectedMentions = mentionUtil.getAllConnectedMentions(mention);
111 keepers.addAll(connectedMentions);
112 for (SimpleInstance connectedMention : connectedMentions) {
113 SimpleInstance connectedAnnotation = mentionUtil.getMentionAnnotation(connectedMention);
114 keepers.add(connectedAnnotation);
115 }
116 }
117 }
118
119 // Collect information about the annotation sets so that we can delete
120 // them now and reconstruct them later
121 // at the end of the method. This is done for performance reasons
122 // because large annotation sets can really
123 // slow down the process of deleting annotations because of the linear
124 // searching that occurs.
125 // Collection<SimpleInstance> annotationSets =
126 // (Collection<SimpleInstance>) kb.getInstances(kpu.getSetCls());
127 // Map<String, String> setDescriptions = new HashMap<String, String>();
128 // Map<String, Set<SimpleInstance>> setAnnotations = new HashMap<String,
129 // Set<SimpleInstance>>();
130 //
131 // for(SimpleInstance annotationSet : annotationSets)
132 // {
133 // String setName = (String)
134 // annotationSet.getOwnSlotValue(kpu.getSetNameSlot());
135 // String setDescription = (String)
136 // annotationSet.getOwnSlotValue(kpu.getSetDescriptionSlot());
137 // setDescriptions.put(setName, setDescription);
138 // Collection<SimpleInstance> annotations = (Collection<SimpleInstance>)
139 // annotationSet.getOwnSlotValues(kpu.getSetAnnotationSlot());
140 // setAnnotations.put(setName, new HashSet<SimpleInstance>());
141 // setAnnotations.get(setName).addAll(annotations);
142 // kb.deleteInstance(annotationSet);
143 // }
144
145 // delete all annotations that are not in 'keepers'
146 Collection<Instance> annotations = kb.getInstances(kpu.getAnnotationCls());
147 for (Instance annotation : annotations) {
148 if (!keepers.contains(annotation)) {
149 kb.deleteInstance(annotation);
150 }
151 }
152
153 // now delete all mentions not in 'keepers' too.
154 Collection<Instance> mentions = kb.getInstances(kpu.getMentionCls());
155 for (Instance mention : mentions) {
156 if (!keepers.contains(mention)) {
157 kb.deleteInstance(mention);
158 }
159 }
160
161 // we will now reconstruct the annotation sets with the annotations that
162 // remain.
163 // for(String setName : setDescriptions.keySet())
164 // {
165 // SimpleInstance setInstance = kb.createSimpleInstance(
166 // null,
167 // null,
168 // CollectionUtilities.createCollection(kpu.getSetCls()),
169 // true);
170 // setInstance.setOwnSlotValue(kpu.getSetNameSlot(), setName);
171 // setInstance.setOwnSlotValue(kpu.getSetDescriptionSlot(),
172 // setDescriptions.get(setName));
173 //
174 // Set<SimpleInstance> setAnns = setAnnotations.get(setName);
175 // Set<SimpleInstance> existingAnns = new HashSet<SimpleInstance>();
176 // for(SimpleInstance setAnn : setAnns)
177 // {
178 // if(!setAnn.isDeleted())
179 // existingAnns.add(setAnn);
180 // }
181 // setInstance.setOwnSlotValues(kpu.getSetAnnotationSlot(),
182 // existingAnns);
183 // }
184
185 }
186
187 }