001 /* 002 * The contents of this file are subject to the Mozilla Public 003 * License Version 1.1 (the "License"); you may not use this file 004 * except in compliance with the License. You may obtain a copy of 005 * the License at http://www.mozilla.org/MPL/ 006 * 007 * Software distributed under the License is distributed on an "AS 008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 009 * implied. See the License for the specific language governing 010 * rights and limitations under the License. 011 * 012 * The Original Code is Knowtator. 013 * 014 * The Initial Developer of the Original Code is University of Colorado. 015 * Copyright (C) 2005 - 2008. All Rights Reserved. 016 * 017 * Knowtator was developed by the Center for Computational Pharmacology 018 * (http://compbio.uchcs.edu) at the University of Colorado Health 019 * Sciences Center School of Medicine with support from the National 020 * Library of Medicine. 021 * 022 * Current information about Knowtator can be obtained at 023 * http://knowtator.sourceforge.net/ 024 * 025 * Contributor(s): 026 * Philip V. Ogren <philip@ogren.info> (Original Author) 027 */ 028 package edu.uchsc.ccp.knowtator.util; 029 030 import java.awt.Component; 031 import java.util.Collection; 032 import java.util.HashSet; 033 import java.util.Set; 034 035 import javax.swing.JOptionPane; 036 037 import edu.stanford.smi.protege.model.Instance; 038 import edu.stanford.smi.protege.model.KnowledgeBase; 039 import edu.stanford.smi.protege.model.Project; 040 import edu.stanford.smi.protege.model.SimpleInstance; 041 import edu.stanford.smi.protege.ui.DisplayUtilities; 042 import edu.stanford.smi.protege.util.CollectionUtilities; 043 import edu.uchsc.ccp.knowtator.AnnotationUtil; 044 import edu.uchsc.ccp.knowtator.FilterUtil; 045 import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil; 046 import edu.uchsc.ccp.knowtator.MentionUtil; 047 import edu.uchsc.ccp.knowtator.TextSourceUtil; 048 049 public class AnnotationCleanup { 050 public static void cleanup(Component parent, KnowledgeBase kb, KnowtatorProjectUtil kpu, 051 TextSourceUtil textSourceUtil, AnnotationUtil annotationUtil, MentionUtil mentionUtil, 052 FilterUtil filterUtil, Project project) { 053 054 int option = JOptionPane.showConfirmDialog(parent, 055 "This option allows you to delete large numbers of unwanted annotations.\n" 056 + "This is done by choosing an annotation filter that defines the set of \n" 057 + "annotations to keep and removing all others. Only annotations that pass\n" 058 + "through the annotation filter and belong to the selected text sources will\n" 059 + "be kept. All others will be deleted. Please archive your project prior to\n" 060 + "to using this option. Also, if you are unhappy with the results, simply close\n" 061 + "the project without saving the changes that have been made. ", "Remove annotations", 062 JOptionPane.OK_CANCEL_OPTION); 063 if (option != JOptionPane.OK_OPTION) 064 return; 065 066 SimpleInstance filter = (SimpleInstance) DisplayUtilities.pickInstance(parent, CollectionUtilities 067 .createCollection(kpu.getFilterCls()), "Select filter for annotations to be kept."); 068 if (filter == null) 069 return; 070 071 Collection<SimpleInstance> textSources = (Collection<SimpleInstance>) DisplayUtilities.pickInstances(parent, 072 kb, CollectionUtilities.createCollection(kpu.getTextSourceCls()), 073 "Select text sources that contain annotations you want to keep."); 074 if (textSources == null || textSources.size() == 0) 075 return; 076 077 option = JOptionPane.showConfirmDialog(parent, "Click OK to proceed with annotation removal.", 078 "Confirm annotation removal", JOptionPane.OK_CANCEL_OPTION); 079 if (option != JOptionPane.OK_OPTION) 080 return; 081 082 cleanup(filter, filterUtil, textSources, annotationUtil, mentionUtil, kb, kpu); 083 084 JOptionPane.showMessageDialog(parent, "Removal of annotations complete.\n" 085 + "To undo changes, please close the project\n" + "without saving it and re-open it.", 086 "Annotation removal complete", JOptionPane.INFORMATION_MESSAGE); 087 } 088 089 public static void cleanup(SimpleInstance filter, FilterUtil filterUtil, Collection<SimpleInstance> textSources, 090 AnnotationUtil annotationUtil, MentionUtil mentionUtil, KnowledgeBase kb, KnowtatorProjectUtil kpu) 091 092 { 093 Set<SimpleInstance> keepers = new HashSet<SimpleInstance>(); 094 095 // First we go through the selected text sources and find all the 096 // "keepers" that have passed through the annotation filter. 097 // This will be all annotations and there corresponding mentions and the 098 // annotations that are connected via those 099 // mentions. 100 // Technically, an annotation that does not pass through the filter 101 // could be a keeper if it is a slot value of an annotation 102 // that does. 103 for (SimpleInstance textSource : textSources) { 104 Collection<SimpleInstance> annotations = annotationUtil.getAnnotations(textSource); 105 annotations = filterUtil.filterAnnotations(annotations, filter); 106 for (SimpleInstance annotation : annotations) { 107 keepers.add(annotation); 108 SimpleInstance mention = annotationUtil.getMention(annotation); 109 keepers.add(mention); 110 Set<SimpleInstance> connectedMentions = mentionUtil.getAllConnectedMentions(mention); 111 keepers.addAll(connectedMentions); 112 for (SimpleInstance connectedMention : connectedMentions) { 113 SimpleInstance connectedAnnotation = mentionUtil.getMentionAnnotation(connectedMention); 114 keepers.add(connectedAnnotation); 115 } 116 } 117 } 118 119 // Collect information about the annotation sets so that we can delete 120 // them now and reconstruct them later 121 // at the end of the method. This is done for performance reasons 122 // because large annotation sets can really 123 // slow down the process of deleting annotations because of the linear 124 // searching that occurs. 125 // Collection<SimpleInstance> annotationSets = 126 // (Collection<SimpleInstance>) kb.getInstances(kpu.getSetCls()); 127 // Map<String, String> setDescriptions = new HashMap<String, String>(); 128 // Map<String, Set<SimpleInstance>> setAnnotations = new HashMap<String, 129 // Set<SimpleInstance>>(); 130 // 131 // for(SimpleInstance annotationSet : annotationSets) 132 // { 133 // String setName = (String) 134 // annotationSet.getOwnSlotValue(kpu.getSetNameSlot()); 135 // String setDescription = (String) 136 // annotationSet.getOwnSlotValue(kpu.getSetDescriptionSlot()); 137 // setDescriptions.put(setName, setDescription); 138 // Collection<SimpleInstance> annotations = (Collection<SimpleInstance>) 139 // annotationSet.getOwnSlotValues(kpu.getSetAnnotationSlot()); 140 // setAnnotations.put(setName, new HashSet<SimpleInstance>()); 141 // setAnnotations.get(setName).addAll(annotations); 142 // kb.deleteInstance(annotationSet); 143 // } 144 145 // delete all annotations that are not in 'keepers' 146 Collection<Instance> annotations = kb.getInstances(kpu.getAnnotationCls()); 147 for (Instance annotation : annotations) { 148 if (!keepers.contains(annotation)) { 149 kb.deleteInstance(annotation); 150 } 151 } 152 153 // now delete all mentions not in 'keepers' too. 154 Collection<Instance> mentions = kb.getInstances(kpu.getMentionCls()); 155 for (Instance mention : mentions) { 156 if (!keepers.contains(mention)) { 157 kb.deleteInstance(mention); 158 } 159 } 160 161 // we will now reconstruct the annotation sets with the annotations that 162 // remain. 163 // for(String setName : setDescriptions.keySet()) 164 // { 165 // SimpleInstance setInstance = kb.createSimpleInstance( 166 // null, 167 // null, 168 // CollectionUtilities.createCollection(kpu.getSetCls()), 169 // true); 170 // setInstance.setOwnSlotValue(kpu.getSetNameSlot(), setName); 171 // setInstance.setOwnSlotValue(kpu.getSetDescriptionSlot(), 172 // setDescriptions.get(setName)); 173 // 174 // Set<SimpleInstance> setAnns = setAnnotations.get(setName); 175 // Set<SimpleInstance> existingAnns = new HashSet<SimpleInstance>(); 176 // for(SimpleInstance setAnn : setAnns) 177 // { 178 // if(!setAnn.isDeleted()) 179 // existingAnns.add(setAnn); 180 // } 181 // setInstance.setOwnSlotValues(kpu.getSetAnnotationSlot(), 182 // existingAnns); 183 // } 184 185 } 186 187 }