View Javadoc

1   /*
2    * Copyright 2015 Data Archiving and Networked Services (an institute of
3    * Koninklijke Nederlandse Akademie van Wetenschappen), King's College London,
4    * Georg-August-Universitaet Goettingen Stiftung Oeffentlichen Rechts
5    *
6    * Licensed under the EUPL, Version 1.1 or – as soon they will be approved by
7    * the European Commission - subsequent versions of the EUPL (the "Licence");
8    * You may not use this work except in compliance with the Licence.
9    * You may obtain a copy of the Licence at:
10   *
11   * https://joinup.ec.europa.eu/software/page/eupl
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the Licence is distributed on an "AS IS" basis,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the Licence for the specific language governing
17   * permissions and limitations under the Licence.
18   */
19  
20  package eu.ehri.project.importers.managers;
21  
22  import com.google.common.collect.Lists;
23  import com.tinkerpop.frames.FramedGraph;
24  import eu.ehri.project.exceptions.ValidationError;
25  import eu.ehri.project.importers.ImportCallback;
26  import eu.ehri.project.importers.ImportLog;
27  import eu.ehri.project.importers.base.ItemImporter;
28  import eu.ehri.project.importers.base.SaxXmlHandler;
29  import eu.ehri.project.importers.exceptions.InputParseError;
30  import eu.ehri.project.importers.properties.XmlImportProperties;
31  import eu.ehri.project.models.base.Actioner;
32  import eu.ehri.project.models.base.PermissionScope;
33  import eu.ehri.project.persistence.ActionManager;
34  import org.slf4j.Logger;
35  import org.slf4j.LoggerFactory;
36  import org.xml.sax.InputSource;
37  import org.xml.sax.SAXException;
38  
39  import javax.xml.parsers.ParserConfigurationException;
40  import javax.xml.parsers.SAXParser;
41  import javax.xml.parsers.SAXParserFactory;
42  import java.io.IOException;
43  import java.io.InputStream;
44  import java.lang.reflect.InvocationTargetException;
45  import java.util.List;
46  import java.util.Map;
47  
48  /**
49   * Class that provides a front-end for importing XML files like EAD and EAC and
50   * nested lists of EAD documents into the graph.
51   */
52  public class SaxImportManager extends AbstractImportManager {
53  
54      private static final Logger logger = LoggerFactory.getLogger(SaxImportManager.class);
55  
56      private final Class<? extends SaxXmlHandler> handlerClass;
57      private final XmlImportProperties properties;
58      private final List<ImportCallback> extraCallbacks;
59  
60      /**
61       * Constructor.
62       *
63       * @param graph    the framed graph
64       * @param scope    the permission scope
65       * @param actioner the actioner
66       */
67      public SaxImportManager(FramedGraph<?> graph,
68              PermissionScope scope,
69              Actioner actioner,
70              boolean tolerant,
71              boolean allowUpdates,
72              Class<? extends ItemImporter> importerClass,
73              Class<? extends SaxXmlHandler> handlerClass,
74              XmlImportProperties properties,
75              List<ImportCallback> callbacks) {
76          super(graph, scope, actioner, tolerant, allowUpdates, importerClass);
77          this.handlerClass = handlerClass;
78          this.properties = properties;
79          this.extraCallbacks = Lists.newArrayList(callbacks);
80          logger.info("importer used: " + importerClass);
81          logger.info("handler used: " + handlerClass);
82      }
83  
84      /**
85       * Constructor.
86       *
87       * @param graph    the framed graph
88       * @param scope    a permission scope
89       * @param actioner the actioner
90       */
91      public SaxImportManager(FramedGraph<?> graph,
92              PermissionScope scope, Actioner actioner,
93              boolean tolerant,
94              boolean allowUpdates,
95              Class<? extends ItemImporter> importerClass, Class<? extends SaxXmlHandler> handlerClass,
96              List<ImportCallback> callbacks) {
97          this(graph, scope, actioner, tolerant, allowUpdates, importerClass, handlerClass, null,
98                  callbacks);
99      }
100 
101     /**
102      * Constructor.
103      *
104      * @param graph    the framed graph
105      * @param scope    a permission scope
106      * @param actioner the actioner
107      */
108     public SaxImportManager(FramedGraph<?> graph,
109             PermissionScope scope, Actioner actioner,
110             boolean tolerant,
111             boolean allowUpdates,
112             Class<? extends ItemImporter> importerClass, Class<? extends SaxXmlHandler> handlerClass,
113             XmlImportProperties properties) {
114         this(graph, scope, actioner, tolerant, allowUpdates, importerClass, handlerClass,
115                 properties,
116                 Lists.<ImportCallback>newArrayList());
117     }
118 
119     /**
120      * Constructor.
121      *
122      * @param graph    the framed graph
123      * @param scope    a permission scope
124      * @param actioner the actioner
125      */
126     public SaxImportManager(FramedGraph<?> graph,
127             PermissionScope scope, Actioner actioner,
128             Class<? extends ItemImporter> importerClass, Class<? extends SaxXmlHandler> handlerClass) {
129         this(graph, scope, actioner, false, false, importerClass, handlerClass, Lists
130                 .<ImportCallback>newArrayList());
131     }
132 
133     /**
134      * Import XML from the given InputStream, as part of the given action.
135      *
136      * @param stream  an input stream
137      * @param context the event context
138      * @param log     a logger object
139      */
140     @Override
141     protected void importInputStream(final InputStream stream, final String tag, final ActionManager.EventContext context,
142             final ImportLog log) throws IOException, ValidationError, InputParseError {
143         try {
144             ItemImporter<Map<String, Object>, ?> importer = importerClass
145                     .getConstructor(FramedGraph.class, PermissionScope.class,
146                             Actioner.class, ImportLog.class)
147                     .newInstance(framedGraph, permissionScope, actioner, log);
148 
149             for (ImportCallback callback : extraCallbacks) {
150                 importer.addCallback(callback);
151             }
152 
153             importer.addCallback(mutation -> defaultImportCallback(log, context, mutation));
154             importer.addErrorCallback(ex -> defaultErrorCallback(log, ex));
155 
156             //TODO decide which handler to use, HandlerFactory? now part of constructor ...
157             SaxXmlHandler handler = properties != null
158                     ? handlerClass.getConstructor(ItemImporter.class, XmlImportProperties.class)
159                     .newInstance(importer, properties)
160                     : handlerClass.getConstructor(ItemImporter.class).newInstance(importer);
161 
162             SAXParserFactory spf = SAXParserFactory.newInstance();
163             spf.setNamespaceAware(false);
164             if (isTolerant()) {
165                 logger.debug("Turning off validation and setting schema to null");
166                 spf.setValidating(false);
167                 spf.setSchema(null);
168             }
169             logger.debug("isValidating: " + spf.isValidating());
170             SAXParser saxParser = spf.newSAXParser();
171             saxParser.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
172             InputSource src = new InputSource(stream);
173             src.setSystemId(tag);
174             saxParser.parse(src, handler);
175         } catch (InstantiationException | IllegalAccessException | InvocationTargetException |
176                 NoSuchMethodException | SecurityException |
177                 ParserConfigurationException e) {
178             // In normal operation these should not be thrown
179             throw new RuntimeException(e);
180         } catch (SAXException e) {
181             // Something was wrong with the XML...
182             throw new InputParseError(e);
183         } catch (RuntimeException e) {
184             if (e.getCause() instanceof ValidationError) {
185                 throw (ValidationError)e.getCause();
186             } else {
187                 throw e;
188             }
189         }
190     }
191 
192     public SaxImportManager withProperties(String properties) {
193         XmlImportProperties xmlImportProperties = properties == null ? null : new XmlImportProperties(properties);
194         return new SaxImportManager(framedGraph, permissionScope, actioner, tolerant, allowUpdates, importerClass,
195                 handlerClass, xmlImportProperties, extraCallbacks);
196     }
197 
198     public SaxImportManager setTolerant(boolean tolerant) {
199         return new SaxImportManager(framedGraph, permissionScope, actioner, tolerant,
200                 allowUpdates, importerClass, handlerClass, properties, extraCallbacks);
201     }
202 
203     public SaxImportManager allowUpdates(boolean allowUpdates) {
204         return new SaxImportManager(framedGraph, permissionScope, actioner, tolerant,
205                 allowUpdates, importerClass, handlerClass, properties, extraCallbacks);
206     }
207 
208     public SaxImportManager withScope(PermissionScope scope) {
209         return new SaxImportManager(framedGraph, scope, actioner, tolerant,
210                 allowUpdates, importerClass, handlerClass, properties, extraCallbacks);
211     }
212 
213     public SaxImportManager withCallback(ImportCallback callback) {
214         List<ImportCallback> newCbs = Lists.newArrayList(extraCallbacks);
215         newCbs.add(callback);
216         return new SaxImportManager(framedGraph, permissionScope, actioner, tolerant,
217                 allowUpdates, importerClass, handlerClass, properties, newCbs);
218     }
219 }