001 /* 002 * The contents of this file are subject to the Mozilla Public 003 * License Version 1.1 (the "License"); you may not use this file 004 * except in compliance with the License. You may obtain a copy of 005 * the License at http://www.mozilla.org/MPL/ 006 * 007 * Software distributed under the License is distributed on an "AS 008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 009 * implied. See the License for the specific language governing 010 * rights and limitations under the License. 011 * 012 * The Original Code is Knowtator. 013 * 014 * The Initial Developer of the Original Code is University of Colorado. 015 * Copyright (C) 2005 - 2008. All Rights Reserved. 016 * 017 * Knowtator was developed by the Center for Computational Pharmacology 018 * (http://compbio.uchcs.edu) at the University of Colorado Health 019 * Sciences Center School of Medicine with support from the National 020 * Library of Medicine. 021 * 022 * Current information about Knowtator can be obtained at 023 * http://knowtator.sourceforge.net/ 024 * 025 * Contributor(s): 026 * Philip V. Ogren <philip@ogren.info> (Original Author) 027 */ 028 029 package edu.uchsc.ccp.knowtator.xml; 030 031 import java.awt.Component; 032 import java.io.File; 033 import java.io.FileOutputStream; 034 import java.io.IOException; 035 import java.util.ArrayList; 036 import java.util.Collection; 037 import java.util.HashSet; 038 import java.util.Iterator; 039 import java.util.Set; 040 041 import javax.swing.JFileChooser; 042 import javax.swing.JOptionPane; 043 044 import org.jdom.Document; 045 import org.jdom.Element; 046 import org.jdom.output.Format; 047 import org.jdom.output.XMLOutputter; 048 049 import edu.stanford.smi.protege.model.Cls; 050 import edu.stanford.smi.protege.model.Instance; 051 import edu.stanford.smi.protege.model.KnowledgeBase; 052 import edu.stanford.smi.protege.model.Project; 053 import edu.stanford.smi.protege.model.SimpleInstance; 054 import edu.stanford.smi.protege.model.Slot; 055 import edu.stanford.smi.protege.ui.DisplayUtilities; 056 import edu.stanford.smi.protege.util.CollectionUtilities; 057 import edu.uchsc.ccp.knowtator.AnnotationUtil; 058 import edu.uchsc.ccp.knowtator.FilterUtil; 059 import edu.uchsc.ccp.knowtator.InvalidSpanException; 060 import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil; 061 import edu.uchsc.ccp.knowtator.MentionUtil; 062 import edu.uchsc.ccp.knowtator.Span; 063 import edu.uchsc.ccp.knowtator.TextSourceUtil; 064 065 public class XMLExport { 066 public static final String XML_EXPORT_DIRECTORY = "XML_EXPORT_FILE"; 067 068 private static File getRecentXMLExportDirectory(Project project) { 069 String path = (String) project.getClientInformation(XML_EXPORT_DIRECTORY); 070 if (path == null) 071 return null; 072 073 File xmlExportDirectory = new File(path); 074 if (xmlExportDirectory.exists() && xmlExportDirectory.isDirectory()) { 075 return xmlExportDirectory; 076 } 077 return null; 078 } 079 080 public static void writeToXML(Component parent, KnowledgeBase kb, KnowtatorProjectUtil kpu, 081 TextSourceUtil textSourceUtil, AnnotationUtil annotationUtil, MentionUtil mentionUtil, 082 FilterUtil filterUtil, Project project) { 083 084 int option = JOptionPane.showConfirmDialog(parent, "The following dialogs allow you to export \n" 085 + "a set of Knowtator annotations to XML.", "XML Export", JOptionPane.OK_CANCEL_OPTION); 086 if (option != JOptionPane.OK_OPTION) 087 return; 088 089 JFileChooser chooser = new JFileChooser(); 090 chooser.setDialogTitle("Please choose a directory to write xml files to."); 091 chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); 092 093 File recentXMLExportDirectory = getRecentXMLExportDirectory(project); 094 if (recentXMLExportDirectory != null) { 095 chooser.setCurrentDirectory(recentXMLExportDirectory.getParentFile()); 096 } 097 098 int returnVal = chooser.showOpenDialog(parent); 099 if (returnVal != JFileChooser.APPROVE_OPTION) 100 return; 101 102 File directory = chooser.getSelectedFile(); 103 104 project.setClientInformation(XML_EXPORT_DIRECTORY, directory.getPath()); 105 106 SimpleInstance filter = (SimpleInstance) DisplayUtilities.pickInstance(parent, CollectionUtilities 107 .createCollection(kpu.getFilterCls()), "Select filter for annotations to be exported."); 108 if (filter == null) 109 return; 110 111 Collection textSources = DisplayUtilities.pickInstances(parent, kb, CollectionUtilities.createCollection(kpu 112 .getTextSourceCls()), "Select text sources from which to choose annotations for export."); 113 114 if (textSources == null || textSources.size() == 0) 115 return; 116 117 Iterator textSourcesItr = textSources.iterator(); 118 while (textSourcesItr.hasNext()) { 119 SimpleInstance textSource = (SimpleInstance) textSourcesItr.next(); 120 try { 121 writeToXML(textSource, filter, directory, annotationUtil, filterUtil, mentionUtil); 122 } catch (IOException ioe) { 123 JOptionPane.showMessageDialog(parent, ioe, "Exception thrown while opening file", 124 JOptionPane.ERROR_MESSAGE); 125 } catch (Exception e) { 126 JOptionPane.showMessageDialog(parent, e, "Exception thrown while exporting annotations to XML", 127 JOptionPane.ERROR_MESSAGE); 128 } 129 } 130 JOptionPane.showMessageDialog(parent, "XML export complete", "XML export complete", 131 JOptionPane.INFORMATION_MESSAGE); 132 } 133 134 public static void writeToXML(SimpleInstance textSource, SimpleInstance filter, File outputDirectory, 135 AnnotationUtil annotationUtil, FilterUtil filterUtil, MentionUtil mentionUtil) throws IOException { 136 Collection<SimpleInstance> annotations = new ArrayList<SimpleInstance>(annotationUtil 137 .getAnnotations(textSource)); 138 annotations = filterUtil.filterAnnotations(annotations, filter); 139 140 Document xmlDocument = new Document(); 141 buildXMLDocument(xmlDocument, annotations, textSource.getName(), annotationUtil, mentionUtil); 142 143 XMLOutputter xmlOut = new XMLOutputter(Format.getPrettyFormat()); 144 xmlOut.output(xmlDocument, new FileOutputStream(new File(outputDirectory, textSource.getName() 145 + ".knowtator.xml"))); 146 } 147 148 public static void buildXMLDocument(Document xmlDocument, Collection<SimpleInstance> annotations, 149 String textSourceName, AnnotationUtil annotationUtil, MentionUtil mentionUtil) { 150 Element annotationsElement = new Element(XMLConstants.ANNOTATIONS_ELEMENT_NAME); 151 xmlDocument.addContent(annotationsElement); 152 153 annotationsElement.setAttribute(XMLConstants.TEXT_SOURCE_ATTRIBUTE_NAME, textSourceName); 154 155 Set<SimpleInstance> mentions = new HashSet<SimpleInstance>(); 156 157 for (SimpleInstance annotation : annotations) { 158 Element annotationElement = new Element(XMLConstants.ANNOTATION_ELEMENT_NAME); 159 160 SimpleInstance mention = annotationUtil.getMention(annotation); 161 if (mention != null) { 162 mentions.add(mention); 163 String mentionName = mention.getName(); 164 Element mentionElement = new Element(XMLConstants.MENTION_ELEMENT_NAME); 165 mentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionName); 166 annotationElement.addContent(mentionElement); 167 } 168 169 SimpleInstance annotator = annotationUtil.getAnnotator(annotation); 170 if (annotator != null) { 171 String annotatorName = annotator.getName(); 172 String annotatorText = annotator.getBrowserText(); 173 Element annotatorElement = new Element(XMLConstants.ANNOTATOR_ELEMENT_NAME); 174 annotatorElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, annotatorName); 175 annotatorElement.addContent(annotatorText); 176 annotationElement.addContent(annotatorElement); 177 } 178 179 try { 180 java.util.List<Span> spans = annotationUtil.getSpans(annotation); 181 for (Span span : spans) { 182 Element spanElement = new Element(XMLConstants.SPAN_ELEMENT_NAME); 183 spanElement.setAttribute(XMLConstants.SPAN_START_ATTRIBUTE_NAME, "" + span.getStart()); 184 spanElement.setAttribute(XMLConstants.SPAN_END_ATTRIBUTE_NAME, "" + span.getEnd()); 185 annotationElement.addContent(spanElement); 186 } 187 } catch (InvalidSpanException ise) { 188 ise.printStackTrace(); // lame! this is where I've finally 189 // admitted to myself that 190 // InvalidSpanException is stupid and 191 // should be an error.... 192 // throw some appropriate exception here. 193 } 194 String spannedText = annotationUtil.getText(annotation); 195 if (spannedText != null) { 196 Element spannedTextElement = new Element(XMLConstants.SPANNED_TEXT_ELEMENT_NAME); 197 spannedTextElement.addContent(spannedText); 198 annotationElement.addContent(spannedTextElement); 199 } 200 String comment = annotationUtil.getComment(annotation); 201 if (comment != null) { 202 Element commentElement = new Element(XMLConstants.COMMENT_ELEMENT_NAME); 203 commentElement.addContent(comment); 204 annotationElement.addContent(commentElement); 205 } 206 207 String creationDate = annotationUtil.getCreationDate(annotation); 208 if (creationDate != null) { 209 Element creationDateElement = new Element(XMLConstants.CREATION_DATE_ELEMENT_NAME); 210 creationDateElement.addContent(creationDate); 211 annotationElement.addContent(creationDateElement); 212 } 213 214 annotationsElement.addContent(annotationElement); 215 } 216 217 Set<SimpleInstance> writtenMentions = new HashSet<SimpleInstance>(); 218 for (SimpleInstance mention : mentions) { 219 buildMention(annotationsElement, mention, writtenMentions, mentionUtil); 220 } 221 222 } 223 224 private static void buildMention(Element annotationsElement, SimpleInstance mention, 225 Set<SimpleInstance> writtenMentions, MentionUtil mentionUtil) { 226 if (!writtenMentions.contains(mention)) { 227 writtenMentions.add(mention); 228 if (mentionUtil.isClassMention(mention)) { 229 Cls mentionCls = mentionUtil.getMentionCls(mention); 230 Element classMentionElement = new Element(XMLConstants.CLASS_MENTION_ELEMENT_NAME); 231 annotationsElement.addContent(classMentionElement); 232 classMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName()); 233 Element mentionClassElement = new Element(XMLConstants.MENTION_CLASS_ELEMENT_NAME); 234 if (mentionCls != null) { 235 mentionClassElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionCls.getName()); 236 mentionClassElement.addContent(mentionCls.getBrowserText()); 237 } else 238 mentionClassElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, "NULL CLASS"); 239 240 classMentionElement.addContent(mentionClassElement); 241 242 Collection<SimpleInstance> slotMentions = mentionUtil.getSlotMentions(mention); 243 for (SimpleInstance slotMention : slotMentions) { 244 // it is possible that the mention has slot mentions that do 245 // not have a value (i.e. a slot mention is there, the slot 246 // is filled in but there is not actually a value for the 247 // slot. 248 // we only want to write slot mentions that actually have 249 // values. 250 if (mentionUtil.hasSlotValue(slotMention)) { 251 Element hasSlotMentionElement = new Element(XMLConstants.HAS_SLOT_MENTION_ELEMENT_NAME); 252 hasSlotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slotMention.getName()); 253 classMentionElement.addContent(hasSlotMentionElement); 254 } 255 } 256 for (SimpleInstance slotMention : slotMentions) { 257 if (mentionUtil.hasSlotValue(slotMention)) 258 buildMention(annotationsElement, slotMention, writtenMentions, mentionUtil); 259 } 260 } else if (mentionUtil.isInstanceMention(mention)) { 261 Instance mentionInstance = mentionUtil.getMentionInstance(mention); 262 Element instanceMentionElement = new Element(XMLConstants.INSTANCE_MENTION_ELEMENT_NAME); 263 annotationsElement.addContent(instanceMentionElement); 264 instanceMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName()); 265 Element mentionInstanceElement = new Element(XMLConstants.MENTION_INSTANCE_ELEMENT_NAME); 266 if (mentionInstance != null) { 267 mentionInstanceElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionInstance.getName()); 268 mentionInstanceElement.addContent(mentionInstance.getBrowserText()); 269 } 270 271 instanceMentionElement.addContent(mentionInstanceElement); 272 273 Collection<SimpleInstance> slotMentions = mentionUtil.getSlotMentions(mention); 274 for (SimpleInstance slotMention : slotMentions) { 275 // we only want to write slot mentions that actually have 276 // values. 277 if (mentionUtil.hasSlotValue(slotMention)) { 278 Element hasSlotMentionElement = new Element(XMLConstants.HAS_SLOT_MENTION_ELEMENT_NAME); 279 hasSlotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slotMention.getName()); 280 instanceMentionElement.addContent(hasSlotMentionElement); 281 282 } 283 } 284 for (SimpleInstance slotMention : slotMentions) { 285 if (mentionUtil.hasSlotValue(slotMention)) 286 buildMention(annotationsElement, slotMention, writtenMentions, mentionUtil); 287 } 288 } else if (mentionUtil.isSlotMention(mention)) { 289 Slot slot = mentionUtil.getSlotMentionSlot(mention); 290 291 Element slotMentionElement = null; 292 if (mentionUtil.isComplexSlotMention(mention)) { 293 slotMentionElement = new Element(XMLConstants.COMPLEX_SLOT_MENTION_ELEMENT_NAME); 294 } else if (mentionUtil.isBooleanSlotMention(mention)) { 295 slotMentionElement = new Element(XMLConstants.BOOLEAN_SLOT_MENTION_ELEMENT_NAME); 296 } else if (mentionUtil.isFloatSlotMention(mention)) { 297 slotMentionElement = new Element(XMLConstants.FLOAT_SLOT_MENTION_ELEMENT_NAME); 298 } else if (mentionUtil.isIntegerSlotMention(mention)) { 299 slotMentionElement = new Element(XMLConstants.INTEGER_SLOT_MENTION_ELEMENT_NAME); 300 } else if (mentionUtil.isStringSlotMention(mention)) { 301 slotMentionElement = new Element(XMLConstants.STRING_SLOT_MENTION_ELEMENT_NAME); 302 } 303 304 annotationsElement.addContent(slotMentionElement); 305 slotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName()); 306 Element mentionSlotElement = new Element(XMLConstants.MENTION_SLOT_ELEMENT_NAME); 307 if (slot != null) 308 mentionSlotElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slot.getName()); 309 else 310 mentionSlotElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, "NULL SLOT"); 311 312 slotMentionElement.addContent(mentionSlotElement); 313 314 Collection<Object> slotValues = mentionUtil.getSlotMentionValues(mention); 315 ArrayList<SimpleInstance> complexSlotValues = new ArrayList<SimpleInstance>(); 316 if (slotValues != null && slotValues.size() > 0) { 317 if (mentionUtil.isComplexSlotMention(mention)) { 318 for (Object slotValue : slotValues) { 319 SimpleInstance slotValueInstance = (SimpleInstance) slotValue; 320 Element complexSlotMentionValueElement = new Element( 321 XMLConstants.COMPLEX_SLOT_MENTION_VALUE_ELEMENT_NAME); 322 complexSlotMentionValueElement.setAttribute(XMLConstants.VALUE_ATTRIBUTE_NAME, 323 slotValueInstance.getName()); 324 slotMentionElement.addContent(complexSlotMentionValueElement); 325 complexSlotValues.add(slotValueInstance); 326 } 327 } else { 328 for (Object slotValue : slotValues) { 329 Element simpleSlotMentionValueElement = null; 330 if (mentionUtil.isIntegerSlotMention(mention)) 331 simpleSlotMentionValueElement = new Element( 332 XMLConstants.INTEGER_SLOT_MENTION_VALUE_ELEMENT_NAME); 333 else if (mentionUtil.isBooleanSlotMention(mention)) 334 simpleSlotMentionValueElement = new Element( 335 XMLConstants.BOOLEAN_SLOT_MENTION_VALUE_ELEMENT_NAME); 336 else if (mentionUtil.isStringSlotMention(mention)) 337 simpleSlotMentionValueElement = new Element( 338 XMLConstants.STRING_SLOT_MENTION_VALUE_ELEMENT_NAME); 339 else if (mentionUtil.isFloatSlotMention(mention)) 340 simpleSlotMentionValueElement = new Element( 341 XMLConstants.FLOAT_SLOT_MENTION_VALUE_ELEMENT_NAME); 342 if (simpleSlotMentionValueElement != null) { 343 simpleSlotMentionValueElement.setAttribute(XMLConstants.VALUE_ATTRIBUTE_NAME, "" 344 + slotValue); 345 slotMentionElement.addContent(simpleSlotMentionValueElement); 346 } 347 } 348 } 349 } 350 for (SimpleInstance slotValueInstance : complexSlotValues) { 351 buildMention(annotationsElement, slotValueInstance, writtenMentions, mentionUtil); 352 } 353 } 354 } 355 } 356 }