1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package eu.ehri.extension;
21
22 import com.google.common.base.Charsets;
23 import com.google.common.base.Splitter;
24 import com.google.common.collect.Lists;
25 import eu.ehri.extension.base.AbstractResource;
26 import eu.ehri.project.core.Tx;
27 import eu.ehri.project.exceptions.DeserializationError;
28 import eu.ehri.project.exceptions.ItemNotFound;
29 import eu.ehri.project.exceptions.ValidationError;
30 import eu.ehri.project.importers.ImportLog;
31 import eu.ehri.project.importers.base.ItemImporter;
32 import eu.ehri.project.importers.base.SaxXmlHandler;
33 import eu.ehri.project.importers.cvoc.SkosImporter;
34 import eu.ehri.project.importers.cvoc.SkosImporterFactory;
35 import eu.ehri.project.importers.eac.EacHandler;
36 import eu.ehri.project.importers.eac.EacImporter;
37 import eu.ehri.project.importers.ead.EadHandler;
38 import eu.ehri.project.importers.ead.EadImporter;
39 import eu.ehri.project.importers.ead.EadSync;
40 import eu.ehri.project.importers.ead.SyncLog;
41 import eu.ehri.project.importers.eag.EagHandler;
42 import eu.ehri.project.importers.eag.EagImporter;
43 import eu.ehri.project.importers.exceptions.InputParseError;
44 import eu.ehri.project.importers.links.LinkImporter;
45 import eu.ehri.project.importers.managers.CsvImportManager;
46 import eu.ehri.project.importers.managers.ImportManager;
47 import eu.ehri.project.importers.managers.SaxImportManager;
48 import eu.ehri.project.models.base.Actioner;
49 import eu.ehri.project.models.base.PermissionScope;
50 import eu.ehri.project.models.cvoc.Vocabulary;
51 import eu.ehri.project.utils.Table;
52 import org.apache.commons.compress.archivers.ArchiveException;
53 import org.apache.commons.compress.archivers.ArchiveInputStream;
54 import org.apache.commons.compress.archivers.ArchiveStreamFactory;
55 import org.apache.commons.io.IOUtils;
56 import org.apache.jena.shared.NoReaderForLangException;
57 import org.neo4j.graphdb.GraphDatabaseService;
58 import org.slf4j.Logger;
59 import org.slf4j.LoggerFactory;
60
61 import javax.ws.rs.Consumes;
62 import javax.ws.rs.DefaultValue;
63 import javax.ws.rs.POST;
64 import javax.ws.rs.Path;
65 import javax.ws.rs.Produces;
66 import javax.ws.rs.QueryParam;
67 import javax.ws.rs.core.Context;
68 import javax.ws.rs.core.MediaType;
69 import java.io.BufferedInputStream;
70 import java.io.EOFException;
71 import java.io.IOException;
72 import java.io.InputStream;
73 import java.nio.charset.StandardCharsets;
74 import java.nio.file.Files;
75 import java.nio.file.Paths;
76 import java.util.List;
77 import java.util.Optional;
78 import java.util.Set;
79 import java.util.zip.GZIPInputStream;
80
81
82
83
84 @Path(ImportResource.ENDPOINT)
85 public class ImportResource extends AbstractResource {
86
87 public static final String ENDPOINT = "import";
88
89 private static final Logger logger = LoggerFactory.getLogger(ImportResource.class);
90 private static final String DEFAULT_EAD_HANDLER = EadHandler.class.getName();
91 private static final String DEFAULT_EAD_IMPORTER = EadImporter.class.getName();
92
93 public static final String BASE_URI_PARAM = "baseURI";
94 public static final String URI_SUFFIX_PARAM = "suffix";
95 public static final String ALLOW_UPDATES_PARAM = "allow-update";
96 public static final String HANDLER_PARAM = "handler";
97 public static final String IMPORTER_PARAM = "importer";
98 public static final String PROPERTIES_PARAM = "properties";
99 public static final String FORMAT_PARAM = "format";
100
101 public ImportResource(@Context GraphDatabaseService database) {
102 super(database);
103 }
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135 @POST
136 @Path("skos")
137 public ImportLog importSkos(
138 @QueryParam(SCOPE_PARAM) String scopeId,
139 @DefaultValue("false") @QueryParam(TOLERANT_PARAM) Boolean tolerant,
140 @QueryParam(BASE_URI_PARAM) String baseURI,
141 @QueryParam(URI_SUFFIX_PARAM) String uriSuffix,
142 @QueryParam(LOG_PARAM) String logMessage,
143 @QueryParam(FORMAT_PARAM) String format,
144 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit,
145 InputStream stream)
146 throws ItemNotFound, ValidationError, IOException, DeserializationError {
147 try (final Tx tx = beginTx()) {
148
149
150 Actioner user = getCurrentActioner();
151 Vocabulary scope = manager.getEntity(scopeId, Vocabulary.class);
152 SkosImporter importer = SkosImporterFactory.newSkosImporter(graph, user, scope);
153
154 ImportLog log = importer
155 .setFormat(format)
156 .setTolerant(tolerant)
157 .setBaseURI(baseURI)
158 .setURISuffix(uriSuffix)
159 .importFile(stream, getLogMessage(logMessage).orElse(null));
160 if (commit) {
161 logger.debug("Committing SKOS import transaction...");
162 tx.success();
163 }
164 return log;
165 } catch (InputParseError e) {
166 throw new DeserializationError("Unable to parse input: " + e.getMessage());
167 } catch (NoReaderForLangException e) {
168 throw new DeserializationError("Unable to read language: " + format);
169 }
170 }
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221 @POST
222 @Consumes({MediaType.TEXT_PLAIN, MediaType.APPLICATION_XML,
223 MediaType.TEXT_XML, MediaType.APPLICATION_OCTET_STREAM})
224 @Path("ead")
225 public ImportLog importEad(
226 @QueryParam(SCOPE_PARAM) String scopeId,
227 @DefaultValue("false") @QueryParam(TOLERANT_PARAM) Boolean tolerant,
228 @DefaultValue("false") @QueryParam(ALLOW_UPDATES_PARAM) Boolean allowUpdates,
229 @QueryParam(LOG_PARAM) String logMessage,
230 @QueryParam(PROPERTIES_PARAM) String propertyFile,
231 @QueryParam(HANDLER_PARAM) String handlerClass,
232 @QueryParam(IMPORTER_PARAM) String importerClass,
233 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit,
234 InputStream data)
235 throws ItemNotFound, ValidationError, IOException, DeserializationError {
236
237 try (final Tx tx = beginTx()) {
238 checkPropertyFile(propertyFile);
239 Class<? extends SaxXmlHandler> handler
240 = getHandlerCls(handlerClass, DEFAULT_EAD_HANDLER);
241 Class<? extends ItemImporter> importer
242 = getImporterCls(importerClass, DEFAULT_EAD_IMPORTER);
243
244
245
246 Actioner user = getCurrentActioner();
247 PermissionScope scope = manager.getEntity(scopeId, PermissionScope.class);
248
249
250 String message = getLogMessage(logMessage).orElse(null);
251 ImportManager importManager = new SaxImportManager(
252 graph, scope, user, importer, handler)
253 .allowUpdates(allowUpdates)
254 .setTolerant(tolerant)
255 .withProperties(propertyFile);
256 ImportLog log = importDataStream(importManager, message, data,
257 MediaType.APPLICATION_XML_TYPE, MediaType.TEXT_XML_TYPE);
258
259 if (commit) {
260 logger.debug("Committing EAD import transaction...");
261 tx.success();
262 }
263
264 return log;
265 }
266 }
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281 @POST
282 @Consumes({MediaType.TEXT_PLAIN, MediaType.APPLICATION_XML,
283 MediaType.TEXT_XML, MediaType.APPLICATION_OCTET_STREAM})
284 @Path("ead-sync")
285 public SyncLog syncEad(
286 @QueryParam(SCOPE_PARAM) String scopeId,
287 @QueryParam("fonds") String fonds,
288 @DefaultValue("false") @QueryParam(TOLERANT_PARAM) Boolean tolerant,
289 @DefaultValue("false") @QueryParam(ALLOW_UPDATES_PARAM) Boolean allowUpdates,
290 @QueryParam(LOG_PARAM) String logMessage,
291 @QueryParam(PROPERTIES_PARAM) String propertyFile,
292 @QueryParam(HANDLER_PARAM) String handlerClass,
293 @QueryParam(IMPORTER_PARAM) String importerClass,
294 @QueryParam("ex") Set<String> ex,
295 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit,
296 InputStream data)
297 throws ItemNotFound, ValidationError, IOException, DeserializationError {
298
299 try (final Tx tx = beginTx()) {
300 checkPropertyFile(propertyFile);
301 Class<? extends SaxXmlHandler> handler
302 = getHandlerCls(handlerClass, DEFAULT_EAD_HANDLER);
303 Class<? extends ItemImporter> importer
304 = getImporterCls(importerClass, DEFAULT_EAD_IMPORTER);
305
306 Actioner user = getCurrentActioner();
307 PermissionScope scope = manager.getEntity(scopeId, PermissionScope.class);
308 PermissionScope syncScope = fonds == null
309 ? scope
310 : manager.getEntity(fonds, PermissionScope.class);
311
312
313 String message = getLogMessage(logMessage).orElse(null);
314 SaxImportManager importManager = new SaxImportManager(
315 graph, scope, user, importer, handler)
316 .allowUpdates(allowUpdates)
317 .setTolerant(tolerant)
318 .withProperties(propertyFile);
319
320
321 EadSync syncManager = new EadSync(graph, api(), syncScope, user, importManager);
322 SyncLog log = syncManager.sync(m -> importDataStream(m, message, data,
323 MediaType.APPLICATION_XML_TYPE, MediaType.TEXT_XML_TYPE), ex, message);
324
325 if (commit) {
326 logger.debug("Committing EAD sync transaction...");
327 tx.success();
328 }
329 return log;
330 } catch (EadSync.EadSyncError e) {
331 throw new DeserializationError(e.getMessage());
332 }
333 }
334
335
336
337
338 @POST
339 @Consumes({MediaType.TEXT_PLAIN, MediaType.APPLICATION_XML,
340 MediaType.TEXT_XML, MediaType.APPLICATION_OCTET_STREAM})
341 @Path("eag")
342 public ImportLog importEag(
343 @QueryParam(SCOPE_PARAM) String scopeId,
344 @DefaultValue("false") @QueryParam(TOLERANT_PARAM) Boolean tolerant,
345 @DefaultValue("false") @QueryParam(ALLOW_UPDATES_PARAM) Boolean allowUpdates,
346 @QueryParam(LOG_PARAM) String logMessage,
347 @QueryParam(PROPERTIES_PARAM) String propertyFile,
348 @QueryParam(HANDLER_PARAM) String handlerClass,
349 @QueryParam(IMPORTER_PARAM) String importerClass,
350 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit,
351 InputStream data)
352 throws ItemNotFound, ValidationError, IOException, DeserializationError {
353 return importEad(scopeId, tolerant, allowUpdates, logMessage, propertyFile,
354 nameOrDefault(handlerClass, EagHandler.class.getName()),
355 nameOrDefault(importerClass, EagImporter.class.getName()),
356 commit, data);
357 }
358
359
360
361
362 @POST
363 @Consumes({MediaType.TEXT_PLAIN, MediaType.APPLICATION_XML,
364 MediaType.TEXT_XML, MediaType.APPLICATION_OCTET_STREAM})
365 @Path("eac")
366 public ImportLog importEac(
367 @QueryParam(SCOPE_PARAM) String scopeId,
368 @DefaultValue("false") @QueryParam(TOLERANT_PARAM) Boolean tolerant,
369 @DefaultValue("false") @QueryParam(ALLOW_UPDATES_PARAM) Boolean allowUpdates,
370 @QueryParam(LOG_PARAM) String logMessage,
371 @QueryParam(PROPERTIES_PARAM) String propertyFile,
372 @QueryParam(HANDLER_PARAM) String handlerClass,
373 @QueryParam(IMPORTER_PARAM) String importerClass,
374 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit,
375 InputStream data)
376 throws ItemNotFound, ValidationError, IOException, DeserializationError {
377 return importEad(scopeId, tolerant, allowUpdates, logMessage, propertyFile,
378 nameOrDefault(handlerClass, EacHandler.class.getName()),
379 nameOrDefault(importerClass, EacImporter.class.getName()),
380 commit, data);
381 }
382
383
384
385
386
387
388
389
390 @POST
391 @Consumes({MediaType.TEXT_PLAIN, CSV_MEDIA_TYPE,
392 MediaType.APPLICATION_OCTET_STREAM})
393 @Produces(MediaType.APPLICATION_JSON)
394 @Path("csv")
395 public ImportLog importCsv(
396 @QueryParam(SCOPE_PARAM) String scopeId,
397 @DefaultValue("false") @QueryParam(TOLERANT_PARAM) Boolean tolerant,
398 @DefaultValue("false") @QueryParam(ALLOW_UPDATES_PARAM) Boolean allowUpdates,
399 @QueryParam(LOG_PARAM) String logMessage,
400 @QueryParam(IMPORTER_PARAM) String importerClass,
401 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit,
402 InputStream data)
403 throws ItemNotFound, ValidationError, IOException, DeserializationError {
404 try (final Tx tx = beginTx()) {
405 Class<? extends ItemImporter> importer
406 = getImporterCls(importerClass, DEFAULT_EAD_IMPORTER);
407
408
409
410 Actioner user = getCurrentActioner();
411 PermissionScope scope = manager.getEntity(scopeId, PermissionScope.class);
412
413
414 String message = getLogMessage(logMessage).orElse(null);
415 ImportManager importManager = new CsvImportManager(
416 graph, scope, user, tolerant, allowUpdates, importer);
417 ImportLog log = importDataStream(importManager, message, data,
418 MediaType.valueOf(CSV_MEDIA_TYPE));
419 if (commit) {
420 logger.debug("Committing CSV import transaction...");
421 tx.success();
422 }
423 return log;
424 }
425 }
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445 @POST
446 @Consumes({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
447 @Produces(MediaType.APPLICATION_JSON)
448 @Path("links")
449 public ImportLog importLinks(
450 @DefaultValue("false") @QueryParam(TOLERANT_PARAM) Boolean tolerant,
451 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit,
452 Table table) throws DeserializationError, ItemNotFound {
453 try (final Tx tx = beginTx()) {
454 ImportLog log = new LinkImporter(graph, getCurrentActioner(), tolerant)
455 .importLinks(table, getLogMessage().orElse(null));
456 if (commit) {
457 logger.debug("Committing link import transaction...");
458 tx.success();
459 }
460 return log;
461 }
462 }
463
464
465
466 private ImportLog importDataStream(
467 ImportManager importManager, String message, InputStream data, MediaType... accepts)
468 throws DeserializationError, ValidationError {
469 MediaType mediaType = requestHeaders.getMediaType();
470 try {
471 if (MediaType.TEXT_PLAIN_TYPE.equals(mediaType)) {
472
473 List<String> paths = getFilePaths(IOUtils.toString(data, StandardCharsets.UTF_8));
474 return importManager.importFiles(paths, message);
475 } else if (Lists.newArrayList(accepts).contains(mediaType)) {
476 return importManager.importInputStream(data, message);
477 } else {
478 return importPotentiallyGZippedArchive(importManager, message, data);
479 }
480 } catch (EOFException e) {
481 throw new DeserializationError("EOF reading input data");
482 } catch (InputParseError | IOException e) {
483 throw new DeserializationError("ParseError: " + e.getMessage());
484 } catch (IllegalArgumentException | ArchiveException e) {
485 throw new DeserializationError(e.getMessage());
486 }
487 }
488
489 private ImportLog importPotentiallyGZippedArchive(
490 ImportManager importManager, String message, InputStream data)
491 throws IOException, ValidationError, ArchiveException, InputParseError {
492 try (BufferedInputStream bufStream = new BufferedInputStream(data)) {
493 bufStream.mark(0);
494 try (GZIPInputStream gzipStream = new GZIPInputStream(bufStream)) {
495 logger.debug("Importing gzipped archive stream...");
496 return importArchive(importManager, message, gzipStream);
497 } catch (java.util.zip.ZipException e) {
498
499 bufStream.reset();
500 logger.debug("Importing uncompressed archive stream...");
501 return importArchive(importManager, message, bufStream);
502 }
503 }
504 }
505
506 private ImportLog importArchive(ImportManager importManager, String message, InputStream data)
507 throws IOException, ValidationError, ArchiveException, InputParseError {
508 try (BufferedInputStream bis = new BufferedInputStream(data);
509 ArchiveInputStream archiveInputStream = new
510 ArchiveStreamFactory(StandardCharsets.UTF_8.displayName())
511 .createArchiveInputStream(bis)) {
512 return importManager.importArchive(archiveInputStream, message);
513 }
514 }
515
516 private static List<String> getFilePaths(String pathList) throws DeserializationError {
517 List<String> files = Lists.newArrayList();
518 for (String path : Splitter.on("\n").omitEmptyStrings().trimResults().split(pathList)) {
519 if (!Files.isRegularFile(Paths.get(path))) {
520 throw new DeserializationError("File specified in payload not found: " + path);
521 }
522 files.add(path);
523 }
524 return files;
525 }
526
527 private static void checkPropertyFile(String properties) throws DeserializationError {
528
529 if (properties != null) {
530 java.nio.file.Path file = Paths.get(properties);
531 if (!Files.isRegularFile(file)) {
532 throw new DeserializationError("Properties file '" + properties + "' " +
533 "either does not exist, or is not a file.");
534 }
535 }
536 }
537
538 @SuppressWarnings("unchecked")
539 private static Class<? extends SaxXmlHandler> getHandlerCls(String handlerName, String
540 defaultHandler)
541 throws DeserializationError {
542 String name = nameOrDefault(handlerName, defaultHandler);
543 try {
544 Class<?> handler = Class.forName(name);
545 if (!SaxXmlHandler.class.isAssignableFrom(handler)) {
546 throw new DeserializationError("Class '" + handlerName + "' is" +
547 " not an instance of " + SaxXmlHandler.class.getSimpleName());
548 }
549 return (Class<? extends SaxXmlHandler>) handler;
550 } catch (ClassNotFoundException e) {
551 throw new DeserializationError("Class not found: " + e.getMessage());
552 }
553 }
554
555 @SuppressWarnings("unchecked")
556 private static Class<? extends ItemImporter> getImporterCls(String importerName, String defaultImporter)
557 throws DeserializationError {
558 String name = nameOrDefault(importerName, defaultImporter);
559 try {
560 Class<?> importer = Class.forName(name);
561 if (!ItemImporter.class.isAssignableFrom(importer)) {
562 throw new DeserializationError("Class '" + importerName + "' is" +
563 " not an instance of " + ItemImporter.class.getSimpleName());
564 }
565 return (Class<? extends ItemImporter>) importer;
566 } catch (ClassNotFoundException e) {
567 throw new DeserializationError("Class not found: " + e.getMessage());
568 }
569 }
570
571 private Optional<String> getLogMessage(String logMessagePathOrText) throws IOException {
572 if (logMessagePathOrText == null || logMessagePathOrText.trim().isEmpty()) {
573 return getLogMessage();
574 } else {
575 java.nio.file.Path fileTest = Paths.get(logMessagePathOrText);
576 if (Files.isRegularFile(fileTest)) {
577 return Optional.of(new String(Files.readAllBytes(fileTest), Charsets.UTF_8));
578 } else {
579 return Optional.of(logMessagePathOrText);
580 }
581 }
582 }
583
584 private static String nameOrDefault(String name, String defaultName) {
585 return (name == null || name.trim().isEmpty()) ? defaultName : name;
586 }
587 }