001    /*
002     * The contents of this file are subject to the Mozilla Public
003     * License Version 1.1 (the "License"); you may not use this file
004     * except in compliance with the License. You may obtain a copy of
005     * the License at http://www.mozilla.org/MPL/
006     *
007     * Software distributed under the License is distributed on an "AS
008     * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
009     * implied. See the License for the specific language governing
010     * rights and limitations under the License.
011     *
012     * The Original Code is Knowtator.
013     *
014     * The Initial Developer of the Original Code is University of Colorado.  
015     * Copyright (C) 2005 - 2008.  All Rights Reserved.
016     *
017     * Knowtator was developed by the Center for Computational Pharmacology
018     * (http://compbio.uchcs.edu) at the University of Colorado Health 
019     *  Sciences Center School of Medicine with support from the National 
020     *  Library of Medicine.  
021     *
022     * Current information about Knowtator can be obtained at 
023     * http://knowtator.sourceforge.net/
024     *
025     * Contributor(s):
026     *   Philip V. Ogren <philip@ogren.info> (Original Author)
027     */
028    
029    package edu.uchsc.ccp.knowtator.xml;
030    
031    import java.awt.Component;
032    import java.io.File;
033    import java.io.FileOutputStream;
034    import java.io.IOException;
035    import java.util.ArrayList;
036    import java.util.Collection;
037    import java.util.HashSet;
038    import java.util.Iterator;
039    import java.util.Set;
040    
041    import javax.swing.JFileChooser;
042    import javax.swing.JOptionPane;
043    
044    import org.jdom.Document;
045    import org.jdom.Element;
046    import org.jdom.output.Format;
047    import org.jdom.output.XMLOutputter;
048    
049    import edu.stanford.smi.protege.model.Cls;
050    import edu.stanford.smi.protege.model.Instance;
051    import edu.stanford.smi.protege.model.KnowledgeBase;
052    import edu.stanford.smi.protege.model.Project;
053    import edu.stanford.smi.protege.model.SimpleInstance;
054    import edu.stanford.smi.protege.model.Slot;
055    import edu.stanford.smi.protege.ui.DisplayUtilities;
056    import edu.stanford.smi.protege.util.CollectionUtilities;
057    import edu.uchsc.ccp.knowtator.AnnotationUtil;
058    import edu.uchsc.ccp.knowtator.FilterUtil;
059    import edu.uchsc.ccp.knowtator.InvalidSpanException;
060    import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil;
061    import edu.uchsc.ccp.knowtator.MentionUtil;
062    import edu.uchsc.ccp.knowtator.Span;
063    import edu.uchsc.ccp.knowtator.TextSourceUtil;
064    
065    public class XMLExport {
066            public static final String XML_EXPORT_DIRECTORY = "XML_EXPORT_FILE";
067    
068            private static File getRecentXMLExportDirectory(Project project) {
069                    String path = (String) project.getClientInformation(XML_EXPORT_DIRECTORY);
070                    if (path == null)
071                            return null;
072    
073                    File xmlExportDirectory = new File(path);
074                    if (xmlExportDirectory.exists() && xmlExportDirectory.isDirectory()) {
075                            return xmlExportDirectory;
076                    }
077                    return null;
078            }
079    
080            public static void writeToXML(Component parent, KnowledgeBase kb, KnowtatorProjectUtil kpu,
081                            TextSourceUtil textSourceUtil, AnnotationUtil annotationUtil, MentionUtil mentionUtil,
082                            FilterUtil filterUtil, Project project) {
083    
084                    int option = JOptionPane.showConfirmDialog(parent, "The following dialogs allow you to export \n"
085                                    + "a set of Knowtator annotations to XML.", "XML Export", JOptionPane.OK_CANCEL_OPTION);
086                    if (option != JOptionPane.OK_OPTION)
087                            return;
088    
089                    JFileChooser chooser = new JFileChooser();
090                    chooser.setDialogTitle("Please choose a directory to write xml files to.");
091                    chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
092    
093                    File recentXMLExportDirectory = getRecentXMLExportDirectory(project);
094                    if (recentXMLExportDirectory != null) {
095                            chooser.setCurrentDirectory(recentXMLExportDirectory.getParentFile());
096                    }
097    
098                    int returnVal = chooser.showOpenDialog(parent);
099                    if (returnVal != JFileChooser.APPROVE_OPTION)
100                            return;
101    
102                    File directory = chooser.getSelectedFile();
103    
104                    project.setClientInformation(XML_EXPORT_DIRECTORY, directory.getPath());
105    
106                    SimpleInstance filter = (SimpleInstance) DisplayUtilities.pickInstance(parent, CollectionUtilities
107                                    .createCollection(kpu.getFilterCls()), "Select filter for annotations to be exported.");
108                    if (filter == null)
109                            return;
110    
111                    Collection textSources = DisplayUtilities.pickInstances(parent, kb, CollectionUtilities.createCollection(kpu
112                                    .getTextSourceCls()), "Select text sources from which to choose annotations for export.");
113    
114                    if (textSources == null || textSources.size() == 0)
115                            return;
116    
117                    Iterator textSourcesItr = textSources.iterator();
118                    while (textSourcesItr.hasNext()) {
119                            SimpleInstance textSource = (SimpleInstance) textSourcesItr.next();
120                            try {
121                                    writeToXML(textSource, filter, directory, annotationUtil, filterUtil, mentionUtil);
122                            } catch (IOException ioe) {
123                                    JOptionPane.showMessageDialog(parent, ioe, "Exception thrown while opening file",
124                                                    JOptionPane.ERROR_MESSAGE);
125                            } catch (Exception e) {
126                                    JOptionPane.showMessageDialog(parent, e, "Exception thrown while exporting annotations to XML",
127                                                    JOptionPane.ERROR_MESSAGE);
128                            }
129                    }
130                    JOptionPane.showMessageDialog(parent, "XML export complete", "XML export complete",
131                                    JOptionPane.INFORMATION_MESSAGE);
132            }
133    
134            public static void writeToXML(SimpleInstance textSource, SimpleInstance filter, File outputDirectory,
135                            AnnotationUtil annotationUtil, FilterUtil filterUtil, MentionUtil mentionUtil) throws IOException {
136                    Collection<SimpleInstance> annotations = new ArrayList<SimpleInstance>(annotationUtil
137                                    .getAnnotations(textSource));
138                    annotations = filterUtil.filterAnnotations(annotations, filter);
139    
140                    Document xmlDocument = new Document();
141                    buildXMLDocument(xmlDocument, annotations, textSource.getName(), annotationUtil, mentionUtil);
142    
143                    XMLOutputter xmlOut = new XMLOutputter(Format.getPrettyFormat());
144                    xmlOut.output(xmlDocument, new FileOutputStream(new File(outputDirectory, textSource.getName()
145                                    + ".knowtator.xml")));
146            }
147    
148            public static void buildXMLDocument(Document xmlDocument, Collection<SimpleInstance> annotations,
149                            String textSourceName, AnnotationUtil annotationUtil, MentionUtil mentionUtil) {
150                    Element annotationsElement = new Element(XMLConstants.ANNOTATIONS_ELEMENT_NAME);
151                    xmlDocument.addContent(annotationsElement);
152    
153                    annotationsElement.setAttribute(XMLConstants.TEXT_SOURCE_ATTRIBUTE_NAME, textSourceName);
154    
155                    Set<SimpleInstance> mentions = new HashSet<SimpleInstance>();
156    
157                    for (SimpleInstance annotation : annotations) {
158                            Element annotationElement = new Element(XMLConstants.ANNOTATION_ELEMENT_NAME);
159    
160                            SimpleInstance mention = annotationUtil.getMention(annotation);
161                            if (mention != null) {
162                                    mentions.add(mention);
163                                    String mentionName = mention.getName();
164                                    Element mentionElement = new Element(XMLConstants.MENTION_ELEMENT_NAME);
165                                    mentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionName);
166                                    annotationElement.addContent(mentionElement);
167                            }
168    
169                            SimpleInstance annotator = annotationUtil.getAnnotator(annotation);
170                            if (annotator != null) {
171                                    String annotatorName = annotator.getName();
172                                    String annotatorText = annotator.getBrowserText();
173                                    Element annotatorElement = new Element(XMLConstants.ANNOTATOR_ELEMENT_NAME);
174                                    annotatorElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, annotatorName);
175                                    annotatorElement.addContent(annotatorText);
176                                    annotationElement.addContent(annotatorElement);
177                            }
178    
179                            try {
180                                    java.util.List<Span> spans = annotationUtil.getSpans(annotation);
181                                    for (Span span : spans) {
182                                            Element spanElement = new Element(XMLConstants.SPAN_ELEMENT_NAME);
183                                            spanElement.setAttribute(XMLConstants.SPAN_START_ATTRIBUTE_NAME, "" + span.getStart());
184                                            spanElement.setAttribute(XMLConstants.SPAN_END_ATTRIBUTE_NAME, "" + span.getEnd());
185                                            annotationElement.addContent(spanElement);
186                                    }
187                            } catch (InvalidSpanException ise) {
188                                    ise.printStackTrace(); // lame! this is where I've finally
189                                    // admitted to myself that
190                                    // InvalidSpanException is stupid and
191                                    // should be an error....
192                                    // throw some appropriate exception here.
193                            }
194                            String spannedText = annotationUtil.getText(annotation);
195                            if (spannedText != null) {
196                                    Element spannedTextElement = new Element(XMLConstants.SPANNED_TEXT_ELEMENT_NAME);
197                                    spannedTextElement.addContent(spannedText);
198                                    annotationElement.addContent(spannedTextElement);
199                            }
200                            String comment = annotationUtil.getComment(annotation);
201                            if (comment != null) {
202                                    Element commentElement = new Element(XMLConstants.COMMENT_ELEMENT_NAME);
203                                    commentElement.addContent(comment);
204                                    annotationElement.addContent(commentElement);
205                            }
206    
207                            String creationDate = annotationUtil.getCreationDate(annotation);
208                            if (creationDate != null) {
209                                    Element creationDateElement = new Element(XMLConstants.CREATION_DATE_ELEMENT_NAME);
210                                    creationDateElement.addContent(creationDate);
211                                    annotationElement.addContent(creationDateElement);
212                            }
213    
214                            annotationsElement.addContent(annotationElement);
215                    }
216    
217                    Set<SimpleInstance> writtenMentions = new HashSet<SimpleInstance>();
218                    for (SimpleInstance mention : mentions) {
219                            buildMention(annotationsElement, mention, writtenMentions, mentionUtil);
220                    }
221    
222            }
223    
224            private static void buildMention(Element annotationsElement, SimpleInstance mention,
225                            Set<SimpleInstance> writtenMentions, MentionUtil mentionUtil) {
226                    if (!writtenMentions.contains(mention)) {
227                            writtenMentions.add(mention);
228                            if (mentionUtil.isClassMention(mention)) {
229                                    Cls mentionCls = mentionUtil.getMentionCls(mention);
230                                    Element classMentionElement = new Element(XMLConstants.CLASS_MENTION_ELEMENT_NAME);
231                                    annotationsElement.addContent(classMentionElement);
232                                    classMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName());
233                                    Element mentionClassElement = new Element(XMLConstants.MENTION_CLASS_ELEMENT_NAME);
234                                    if (mentionCls != null) {
235                                            mentionClassElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionCls.getName());
236                                            mentionClassElement.addContent(mentionCls.getBrowserText());
237                                    } else
238                                            mentionClassElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, "NULL CLASS");
239    
240                                    classMentionElement.addContent(mentionClassElement);
241    
242                                    Collection<SimpleInstance> slotMentions = mentionUtil.getSlotMentions(mention);
243                                    for (SimpleInstance slotMention : slotMentions) {
244                                            // it is possible that the mention has slot mentions that do
245                                            // not have a value (i.e. a slot mention is there, the slot
246                                            // is filled in but there is not actually a value for the
247                                            // slot.
248                                            // we only want to write slot mentions that actually have
249                                            // values.
250                                            if (mentionUtil.hasSlotValue(slotMention)) {
251                                                    Element hasSlotMentionElement = new Element(XMLConstants.HAS_SLOT_MENTION_ELEMENT_NAME);
252                                                    hasSlotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slotMention.getName());
253                                                    classMentionElement.addContent(hasSlotMentionElement);
254                                            }
255                                    }
256                                    for (SimpleInstance slotMention : slotMentions) {
257                                            if (mentionUtil.hasSlotValue(slotMention))
258                                                    buildMention(annotationsElement, slotMention, writtenMentions, mentionUtil);
259                                    }
260                            } else if (mentionUtil.isInstanceMention(mention)) {
261                                    Instance mentionInstance = mentionUtil.getMentionInstance(mention);
262                                    Element instanceMentionElement = new Element(XMLConstants.INSTANCE_MENTION_ELEMENT_NAME);
263                                    annotationsElement.addContent(instanceMentionElement);
264                                    instanceMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName());
265                                    Element mentionInstanceElement = new Element(XMLConstants.MENTION_INSTANCE_ELEMENT_NAME);
266                                    if (mentionInstance != null) {
267                                            mentionInstanceElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mentionInstance.getName());
268                                            mentionInstanceElement.addContent(mentionInstance.getBrowserText());
269                                    }
270    
271                                    instanceMentionElement.addContent(mentionInstanceElement);
272    
273                                    Collection<SimpleInstance> slotMentions = mentionUtil.getSlotMentions(mention);
274                                    for (SimpleInstance slotMention : slotMentions) {
275                                            // we only want to write slot mentions that actually have
276                                            // values.
277                                            if (mentionUtil.hasSlotValue(slotMention)) {
278                                                    Element hasSlotMentionElement = new Element(XMLConstants.HAS_SLOT_MENTION_ELEMENT_NAME);
279                                                    hasSlotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slotMention.getName());
280                                                    instanceMentionElement.addContent(hasSlotMentionElement);
281    
282                                            }
283                                    }
284                                    for (SimpleInstance slotMention : slotMentions) {
285                                            if (mentionUtil.hasSlotValue(slotMention))
286                                                    buildMention(annotationsElement, slotMention, writtenMentions, mentionUtil);
287                                    }
288                            } else if (mentionUtil.isSlotMention(mention)) {
289                                    Slot slot = mentionUtil.getSlotMentionSlot(mention);
290    
291                                    Element slotMentionElement = null;
292                                    if (mentionUtil.isComplexSlotMention(mention)) {
293                                            slotMentionElement = new Element(XMLConstants.COMPLEX_SLOT_MENTION_ELEMENT_NAME);
294                                    } else if (mentionUtil.isBooleanSlotMention(mention)) {
295                                            slotMentionElement = new Element(XMLConstants.BOOLEAN_SLOT_MENTION_ELEMENT_NAME);
296                                    } else if (mentionUtil.isFloatSlotMention(mention)) {
297                                            slotMentionElement = new Element(XMLConstants.FLOAT_SLOT_MENTION_ELEMENT_NAME);
298                                    } else if (mentionUtil.isIntegerSlotMention(mention)) {
299                                            slotMentionElement = new Element(XMLConstants.INTEGER_SLOT_MENTION_ELEMENT_NAME);
300                                    } else if (mentionUtil.isStringSlotMention(mention)) {
301                                            slotMentionElement = new Element(XMLConstants.STRING_SLOT_MENTION_ELEMENT_NAME);
302                                    }
303    
304                                    annotationsElement.addContent(slotMentionElement);
305                                    slotMentionElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, mention.getName());
306                                    Element mentionSlotElement = new Element(XMLConstants.MENTION_SLOT_ELEMENT_NAME);
307                                    if (slot != null)
308                                            mentionSlotElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, slot.getName());
309                                    else
310                                            mentionSlotElement.setAttribute(XMLConstants.ID_ATTRIBUTE_NAME, "NULL SLOT");
311    
312                                    slotMentionElement.addContent(mentionSlotElement);
313    
314                                    Collection<Object> slotValues = mentionUtil.getSlotMentionValues(mention);
315                                    ArrayList<SimpleInstance> complexSlotValues = new ArrayList<SimpleInstance>();
316                                    if (slotValues != null && slotValues.size() > 0) {
317                                            if (mentionUtil.isComplexSlotMention(mention)) {
318                                                    for (Object slotValue : slotValues) {
319                                                            SimpleInstance slotValueInstance = (SimpleInstance) slotValue;
320                                                            Element complexSlotMentionValueElement = new Element(
321                                                                            XMLConstants.COMPLEX_SLOT_MENTION_VALUE_ELEMENT_NAME);
322                                                            complexSlotMentionValueElement.setAttribute(XMLConstants.VALUE_ATTRIBUTE_NAME,
323                                                                            slotValueInstance.getName());
324                                                            slotMentionElement.addContent(complexSlotMentionValueElement);
325                                                            complexSlotValues.add(slotValueInstance);
326                                                    }
327                                            } else {
328                                                    for (Object slotValue : slotValues) {
329                                                            Element simpleSlotMentionValueElement = null;
330                                                            if (mentionUtil.isIntegerSlotMention(mention))
331                                                                    simpleSlotMentionValueElement = new Element(
332                                                                                    XMLConstants.INTEGER_SLOT_MENTION_VALUE_ELEMENT_NAME);
333                                                            else if (mentionUtil.isBooleanSlotMention(mention))
334                                                                    simpleSlotMentionValueElement = new Element(
335                                                                                    XMLConstants.BOOLEAN_SLOT_MENTION_VALUE_ELEMENT_NAME);
336                                                            else if (mentionUtil.isStringSlotMention(mention))
337                                                                    simpleSlotMentionValueElement = new Element(
338                                                                                    XMLConstants.STRING_SLOT_MENTION_VALUE_ELEMENT_NAME);
339                                                            else if (mentionUtil.isFloatSlotMention(mention))
340                                                                    simpleSlotMentionValueElement = new Element(
341                                                                                    XMLConstants.FLOAT_SLOT_MENTION_VALUE_ELEMENT_NAME);
342                                                            if (simpleSlotMentionValueElement != null) {
343                                                                    simpleSlotMentionValueElement.setAttribute(XMLConstants.VALUE_ATTRIBUTE_NAME, ""
344                                                                                    + slotValue);
345                                                                    slotMentionElement.addContent(simpleSlotMentionValueElement);
346                                                            }
347                                                    }
348                                            }
349                                    }
350                                    for (SimpleInstance slotValueInstance : complexSlotValues) {
351                                            buildMention(annotationsElement, slotValueInstance, writtenMentions, mentionUtil);
352                                    }
353                            }
354                    }
355            }
356    }