001 /*
002 * The contents of this file are subject to the Mozilla Public
003 * License Version 1.1 (the "License"); you may not use this file
004 * except in compliance with the License. You may obtain a copy of
005 * the License at http://www.mozilla.org/MPL/
006 *
007 * Software distributed under the License is distributed on an "AS
008 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009 * implied. See the License for the specific language governing
010 * rights and limitations under the License.
011 *
012 * The Original Code is Knowtator.
013 *
014 * The Initial Developer of the Original Code is University of Colorado.
015 * Copyright (C) 2005 - 2008. All Rights Reserved.
016 *
017 * Knowtator was developed by the Center for Computational Pharmacology
018 * (http://compbio.uchcs.edu) at the University of Colorado Health
019 * Sciences Center School of Medicine with support from the National
020 * Library of Medicine.
021 *
022 * Current information about Knowtator can be obtained at
023 * http://knowtator.sourceforge.net/
024 *
025 * Contributor(s):
026 * Philip V. Ogren <philip@ogren.info> (Original Author)
027 */
028
029 package edu.uchsc.ccp.knowtator.xml;
030
031 import java.awt.Component;
032 import java.io.File;
033 import java.io.FileOutputStream;
034 import java.io.IOException;
035 import java.util.ArrayList;
036 import java.util.Collection;
037 import java.util.HashSet;
038 import java.util.Iterator;
039 import java.util.Set;
040
041 import javax.swing.JFileChooser;
042 import javax.swing.JOptionPane;
043
044 import org.jdom.Document;
045 import org.jdom.Element;
046 import org.jdom.output.Format;
047 import org.jdom.output.XMLOutputter;
048
049 import edu.stanford.smi.protege.model.Cls;
050 import edu.stanford.smi.protege.model.Instance;
051 import edu.stanford.smi.protege.model.KnowledgeBase;
052 import edu.stanford.smi.protege.model.Project;
053 import edu.stanford.smi.protege.model.SimpleInstance;
054 import edu.stanford.smi.protege.model.Slot;
055 import edu.stanford.smi.protege.ui.DisplayUtilities;
056 import edu.stanford.smi.protege.util.CollectionUtilities;
057 import edu.uchsc.ccp.knowtator.AnnotationUtil;
058 import edu.uchsc.ccp.knowtator.FilterUtil;
059 import edu.uchsc.ccp.knowtator.InvalidSpanException;
060 import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil;
061 import edu.uchsc.ccp.knowtator.MentionUtil;
062 import edu.uchsc.ccp.knowtator.Span;
063 import edu.uchsc.ccp.knowtator.TextSourceUtil;
064
065 public class XMLExport {
066 public static final String XML_EXPORT_DIRECTORY = "XML_EXPORT_FILE";
067
068 private static File getRecentXMLExportDirectory(Project project) {
069 String path = (String) project.getClientInformation(XML_EXPORT_DIRECTORY);
070 if (path == null)
071 return null;
072
073 File xmlExportDirectory = new File(path);
074 if (xmlExportDirectory.exists() && xmlExportDirectory.isDirectory()) {
075 return xmlExportDirectory;
076 }
077 return null;
078 }
079
080 public static void writeToXML(Component parent, KnowledgeBase kb, KnowtatorProjectUtil kpu,
081 TextSourceUtil textSourceUtil, AnnotationUtil annotationUtil, MentionUtil mentionUtil,
082 FilterUtil filterUtil, Project project) {
083
084 int option = JOptionPane.showConfirmDialog(parent, "The following dialogs allow you to export \n"
085 + "a set of Knowtator annotations to XML.", "XML Export", JOptionPane.OK_CANCEL_OPTION);
086 if (option != JOptionPane.OK_OPTION)
087 return;
088
089 JFileChooser chooser = new JFileChooser();
090 chooser.setDialogTitle("Please choose a directory to write xml files to.");
091 chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
092
093 File recentXMLExportDirectory = getRecentXMLExportDirectory(project);
094 if (recentXMLExportDirectory != null) {
095 chooser.setCurrentDirectory(recentXMLExportDirectory.getParentFile());
096 }
097
098 int returnVal = chooser.showOpenDialog(parent);
099 if (returnVal != JFileChooser.APPROVE_OPTION)
100 return;
101
102 File directory = chooser.getSelectedFile();
103
104 project.setClientInformation(XML_EXPORT_DIRECTORY, directory.getPath());
105
106 SimpleInstance filter = (SimpleInstance) DisplayUtilities.pickInstance(parent, CollectionUtilities
107 .createCollection(kpu.getFilterCls()), "Select filter for annotations to be exported.");
108 if (filter == null)
109 return;
110
111 Collection textSources = DisplayUtilities.pickInstances(parent, kb, CollectionUtilities.createCollection(kpu
112 .getTextSourceCls()), "Select text sources from which to choose annotations for export.");
113
114 if (textSources == null || textSources.size() == 0)
115 return;
116
117 Iterator textSourcesItr = textSources.iterator();
118 while (textSourcesItr.hasNext()) {
119 SimpleInstance textSource = (SimpleInstance) textSourcesItr.next();
120 try {
121 writeToXML(textSource, filter, directory, annotationUtil, filterUtil, mentionUtil);
122 } catch (IOException ioe) {
123 JOptionPane.showMessageDialog(parent, ioe, "Exception thrown while opening file",
124 JOptionPane.ERROR_MESSAGE);
125 } catch (Exception e) {
126 JOptionPane.showMessageDialog(parent, e, "Exception thrown while exporting annotations to XML",
127 JOptionPane.ERROR_MESSAGE);
128 }
129 }
130 JOptionPane.showMessageDialog(parent, "XML export complete", "XML export complete",
131 JOptionPane.INFORMATION_MESSAGE);
132 }
133
134 public static void writeToXML(SimpleInstance textSource, SimpleInstance filter, File outputDirectory,
135 AnnotationUtil annotationUtil, FilterUtil filterUtil, MentionUtil mentionUtil) throws IOException {
136 Collection<SimpleInstance> annotations = new ArrayList<SimpleInstance>(annotationUtil
137 .getAnnotations(textSource));
138 annotations = filterUtil.filterAnnotations(annotations, filter);
139
140 Document xmlDocument = new Document();
141 buildXMLDocument(xmlDocument, annotations, textSource.getName(), annotationUtil, mentionUtil);
142
143 XMLOutputter xmlOut = new XMLOutputter(Format.getPrettyFormat());
144 xmlOut.output(xmlDocument, new FileOutputStream(new File(outputDirectory, textSource.getName()
145 + ".knowtator.xml")));
146 }
147
148 public static void buildXMLDocument(Document xmlDocument, Collection<SimpleInstance> annotations,
149 String textSourceName, AnnotationUtil annotationUtil, MentionUtil mentionUtil) {
150 Element annotationsElement = new Element(XMLConstants.ANNOTATIONS_ELEMENT_NAME);
151 xmlDocument.addContent(annotationsElement);
152
153 annotationsElement.setAttribute(XMLConstants.TEXT_SOURCE_ATTRIBUTE_NAME, textSourceName);
154
155 Set<SimpleInstance> mentions = new HashSet<SimpleInstance>();
156
157 for (SimpleInstance annotation : annotations) {
158 Element annotationElement = new Element(XMLConstants.ANNOTATION_ELEMENT_NAME);
159
160 SimpleInstance mention = annotationUtil.getMention(annotation);
161 if (mention != null) {
162 mentions.add(mention);
163 String mentionName = mention.getName();
164 Element mentionElement = new Element(XMLConstants.MENTION_ELEMENT_NAME);
165 mentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionName);
166 annotationElement.addContent(mentionElement);
167 }
168
169 SimpleInstance annotator = annotationUtil.getAnnotator(annotation);
170 if (annotator != null) {
171 String annotatorName = annotator.getName();
172 String annotatorText = annotator.getBrowserText();
173 Element annotatorElement = new Element(XMLConstants.ANNOTATOR_ELEMENT_NAME);
174 annotatorElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, annotatorName);
175 annotatorElement.addContent(annotatorText);
176 annotationElement.addContent(annotatorElement);
177 }
178
179 try {
180 java.util.List<Span> spans = annotationUtil.getSpans(annotation);
181 for (Span span : spans) {
182 Element spanElement = new Element(XMLConstants.SPAN_ELEMENT_NAME);
183 spanElement.setAttribute(XMLConstants.SPAN_START_ATTRIBUTE_NAME, "" + span.getStart());
184 spanElement.setAttribute(XMLConstants.SPAN_END_ATTRIBUTE_NAME, "" + span.getEnd());
185 annotationElement.addContent(spanElement);
186 }
187 } catch (InvalidSpanException ise) {
188 ise.printStackTrace(); // lame! this is where I've finally
189 // admitted to myself that
190 // InvalidSpanException is stupid and
191 // should be an error....
192 // throw some appropriate exception here.
193 }
194 String spannedText = annotationUtil.getText(annotation);
195 if (spannedText != null) {
196 Element spannedTextElement = new Element(XMLConstants.SPANNED_TEXT_ELEMENT_NAME);
197 spannedTextElement.addContent(spannedText);
198 annotationElement.addContent(spannedTextElement);
199 }
200 String comment = annotationUtil.getComment(annotation);
201 if (comment != null) {
202 Element commentElement = new Element(XMLConstants.COMMENT_ELEMENT_NAME);
203 commentElement.addContent(comment);
204 annotationElement.addContent(commentElement);
205 }
206
207 String creationDate = annotationUtil.getCreationDate(annotation);
208 if (creationDate != null) {
209 Element creationDateElement = new Element(XMLConstants.CREATION_DATE_ELEMENT_NAME);
210 creationDateElement.addContent(creationDate);
211 annotationElement.addContent(creationDateElement);
212 }
213
214 annotationsElement.addContent(annotationElement);
215 }
216
217 Set<SimpleInstance> writtenMentions = new HashSet<SimpleInstance>();
218 for (SimpleInstance mention : mentions) {
219 buildMention(annotationsElement, mention, writtenMentions, mentionUtil);
220 }
221
222 }
223
224 private static void buildMention(Element annotationsElement, SimpleInstance mention,
225 Set<SimpleInstance> writtenMentions, MentionUtil mentionUtil) {
226 if (!writtenMentions.contains(mention)) {
227 writtenMentions.add(mention);
228 if (mentionUtil.isClassMention(mention)) {
229 Cls mentionCls = mentionUtil.getMentionCls(mention);
230 Element classMentionElement = new Element(XMLConstants.CLASS_MENTION_ELEMENT_NAME);
231 annotationsElement.addContent(classMentionElement);
232 classMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName());
233 Element mentionClassElement = new Element(XMLConstants.MENTION_CLASS_ELEMENT_NAME);
234 if (mentionCls != null) {
235 mentionClassElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionCls.getName());
236 mentionClassElement.addContent(mentionCls.getBrowserText());
237 } else
238 mentionClassElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, "NULL CLASS");
239
240 classMentionElement.addContent(mentionClassElement);
241
242 Collection<SimpleInstance> slotMentions = mentionUtil.getSlotMentions(mention);
243 for (SimpleInstance slotMention : slotMentions) {
244 // it is possible that the mention has slot mentions that do
245 // not have a value (i.e. a slot mention is there, the slot
246 // is filled in but there is not actually a value for the
247 // slot.
248 // we only want to write slot mentions that actually have
249 // values.
250 if (mentionUtil.hasSlotValue(slotMention)) {
251 Element hasSlotMentionElement = new Element(XMLConstants.HAS_SLOT_MENTION_ELEMENT_NAME);
252 hasSlotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slotMention.getName());
253 classMentionElement.addContent(hasSlotMentionElement);
254 }
255 }
256 for (SimpleInstance slotMention : slotMentions) {
257 if (mentionUtil.hasSlotValue(slotMention))
258 buildMention(annotationsElement, slotMention, writtenMentions, mentionUtil);
259 }
260 } else if (mentionUtil.isInstanceMention(mention)) {
261 Instance mentionInstance = mentionUtil.getMentionInstance(mention);
262 Element instanceMentionElement = new Element(XMLConstants.INSTANCE_MENTION_ELEMENT_NAME);
263 annotationsElement.addContent(instanceMentionElement);
264 instanceMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName());
265 Element mentionInstanceElement = new Element(XMLConstants.MENTION_INSTANCE_ELEMENT_NAME);
266 if (mentionInstance != null) {
267 mentionInstanceElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionInstance.getName());
268 mentionInstanceElement.addContent(mentionInstance.getBrowserText());
269 }
270
271 instanceMentionElement.addContent(mentionInstanceElement);
272
273 Collection<SimpleInstance> slotMentions = mentionUtil.getSlotMentions(mention);
274 for (SimpleInstance slotMention : slotMentions) {
275 // we only want to write slot mentions that actually have
276 // values.
277 if (mentionUtil.hasSlotValue(slotMention)) {
278 Element hasSlotMentionElement = new Element(XMLConstants.HAS_SLOT_MENTION_ELEMENT_NAME);
279 hasSlotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slotMention.getName());
280 instanceMentionElement.addContent(hasSlotMentionElement);
281
282 }
283 }
284 for (SimpleInstance slotMention : slotMentions) {
285 if (mentionUtil.hasSlotValue(slotMention))
286 buildMention(annotationsElement, slotMention, writtenMentions, mentionUtil);
287 }
288 } else if (mentionUtil.isSlotMention(mention)) {
289 Slot slot = mentionUtil.getSlotMentionSlot(mention);
290
291 Element slotMentionElement = null;
292 if (mentionUtil.isComplexSlotMention(mention)) {
293 slotMentionElement = new Element(XMLConstants.COMPLEX_SLOT_MENTION_ELEMENT_NAME);
294 } else if (mentionUtil.isBooleanSlotMention(mention)) {
295 slotMentionElement = new Element(XMLConstants.BOOLEAN_SLOT_MENTION_ELEMENT_NAME);
296 } else if (mentionUtil.isFloatSlotMention(mention)) {
297 slotMentionElement = new Element(XMLConstants.FLOAT_SLOT_MENTION_ELEMENT_NAME);
298 } else if (mentionUtil.isIntegerSlotMention(mention)) {
299 slotMentionElement = new Element(XMLConstants.INTEGER_SLOT_MENTION_ELEMENT_NAME);
300 } else if (mentionUtil.isStringSlotMention(mention)) {
301 slotMentionElement = new Element(XMLConstants.STRING_SLOT_MENTION_ELEMENT_NAME);
302 }
303
304 annotationsElement.addContent(slotMentionElement);
305 slotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName());
306 Element mentionSlotElement = new Element(XMLConstants.MENTION_SLOT_ELEMENT_NAME);
307 if (slot != null)
308 mentionSlotElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slot.getName());
309 else
310 mentionSlotElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, "NULL SLOT");
311
312 slotMentionElement.addContent(mentionSlotElement);
313
314 Collection<Object> slotValues = mentionUtil.getSlotMentionValues(mention);
315 ArrayList<SimpleInstance> complexSlotValues = new ArrayList<SimpleInstance>();
316 if (slotValues != null && slotValues.size() > 0) {
317 if (mentionUtil.isComplexSlotMention(mention)) {
318 for (Object slotValue : slotValues) {
319 SimpleInstance slotValueInstance = (SimpleInstance) slotValue;
320 Element complexSlotMentionValueElement = new Element(
321 XMLConstants.COMPLEX_SLOT_MENTION_VALUE_ELEMENT_NAME);
322 complexSlotMentionValueElement.setAttribute(XMLConstants.VALUE_ATTRIBUTE_NAME,
323 slotValueInstance.getName());
324 slotMentionElement.addContent(complexSlotMentionValueElement);
325 complexSlotValues.add(slotValueInstance);
326 }
327 } else {
328 for (Object slotValue : slotValues) {
329 Element simpleSlotMentionValueElement = null;
330 if (mentionUtil.isIntegerSlotMention(mention))
331 simpleSlotMentionValueElement = new Element(
332 XMLConstants.INTEGER_SLOT_MENTION_VALUE_ELEMENT_NAME);
333 else if (mentionUtil.isBooleanSlotMention(mention))
334 simpleSlotMentionValueElement = new Element(
335 XMLConstants.BOOLEAN_SLOT_MENTION_VALUE_ELEMENT_NAME);
336 else if (mentionUtil.isStringSlotMention(mention))
337 simpleSlotMentionValueElement = new Element(
338 XMLConstants.STRING_SLOT_MENTION_VALUE_ELEMENT_NAME);
339 else if (mentionUtil.isFloatSlotMention(mention))
340 simpleSlotMentionValueElement = new Element(
341 XMLConstants.FLOAT_SLOT_MENTION_VALUE_ELEMENT_NAME);
342 if (simpleSlotMentionValueElement != null) {
343 simpleSlotMentionValueElement.setAttribute(XMLConstants.VALUE_ATTRIBUTE_NAME, ""
344 + slotValue);
345 slotMentionElement.addContent(simpleSlotMentionValueElement);
346 }
347 }
348 }
349 }
350 for (SimpleInstance slotValueInstance : complexSlotValues) {
351 buildMention(annotationsElement, slotValueInstance, writtenMentions, mentionUtil);
352 }
353 }
354 }
355 }
356 }