1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package eu.ehri.extension;
21
22 import com.google.common.collect.Lists;
23 import com.google.common.collect.Ordering;
24 import com.tinkerpop.blueprints.CloseableIterable;
25 import com.tinkerpop.blueprints.Vertex;
26 import eu.ehri.extension.base.AbstractResource;
27 import eu.ehri.project.acl.ContentTypes;
28 import eu.ehri.project.core.Tx;
29 import eu.ehri.project.core.impl.Neo4jGraphManager;
30 import eu.ehri.project.definitions.Entities;
31 import eu.ehri.project.definitions.Ontology;
32 import eu.ehri.project.exceptions.DeserializationError;
33 import eu.ehri.project.exceptions.ItemNotFound;
34 import eu.ehri.project.exceptions.PermissionDenied;
35 import eu.ehri.project.exceptions.SerializationError;
36 import eu.ehri.project.exceptions.ValidationError;
37 import eu.ehri.project.exporters.cvoc.SchemaExporter;
38 import eu.ehri.project.models.AccessPointType;
39 import eu.ehri.project.models.DocumentaryUnit;
40 import eu.ehri.project.models.EntityClass;
41 import eu.ehri.project.models.Link;
42 import eu.ehri.project.models.Repository;
43 import eu.ehri.project.models.base.Accessible;
44 import eu.ehri.project.models.base.Actioner;
45 import eu.ehri.project.models.base.Described;
46 import eu.ehri.project.models.base.Description;
47 import eu.ehri.project.models.base.Linkable;
48 import eu.ehri.project.models.base.PermissionScope;
49 import eu.ehri.project.models.cvoc.Vocabulary;
50 import eu.ehri.project.persistence.Bundle;
51 import eu.ehri.project.persistence.Serializer;
52 import eu.ehri.project.tools.DbUpgrader1to2;
53 import eu.ehri.project.tools.FindReplace;
54 import eu.ehri.project.tools.IdRegenerator;
55 import eu.ehri.project.tools.Linker;
56 import eu.ehri.project.utils.Table;
57 import eu.ehri.project.utils.fixtures.FixtureLoaderFactory;
58 import org.neo4j.graphdb.GraphDatabaseService;
59
60 import javax.ws.rs.Consumes;
61 import javax.ws.rs.DefaultValue;
62 import javax.ws.rs.FormParam;
63 import javax.ws.rs.GET;
64 import javax.ws.rs.POST;
65 import javax.ws.rs.Path;
66 import javax.ws.rs.PathParam;
67 import javax.ws.rs.Produces;
68 import javax.ws.rs.QueryParam;
69 import javax.ws.rs.core.Context;
70 import javax.ws.rs.core.MediaType;
71 import javax.ws.rs.core.Response;
72 import javax.ws.rs.core.StreamingOutput;
73 import java.io.IOException;
74 import java.io.InputStream;
75 import java.util.Iterator;
76 import java.util.List;
77 import java.util.Set;
78 import java.util.concurrent.atomic.AtomicInteger;
79 import java.util.stream.Collectors;
80
81
82
83
84
85
86 @Path(ToolsResource.ENDPOINT)
87 public class ToolsResource extends AbstractResource {
88
89 private final Linker linker;
90
91 public static final String ENDPOINT = "tools";
92
93 private static final String SINGLE_PARAM = "single";
94 private static final String LANG_PARAM = "lang";
95 private static final String ACCESS_POINT_TYPE_PARAM = "apt";
96 private static final String DEFAULT_LANG = "eng";
97
98 public ToolsResource(@Context GraphDatabaseService database) {
99 super(database);
100 linker = new Linker(graph);
101 }
102
103 @GET
104 @Produces(MediaType.TEXT_PLAIN)
105 @Path("version")
106 public String version() {
107 return getClass().getPackage().getImplementationVersion();
108 }
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126 @POST
127 @Path("find-replace")
128 @Consumes(MediaType.APPLICATION_FORM_URLENCODED)
129 @Produces({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
130 public Table findReplace(
131 final @FormParam("from") String from,
132 final @FormParam("to") String to,
133 final @QueryParam("type") String type,
134 final @QueryParam("subtype") String subType,
135 final @QueryParam("property") String property,
136 final @QueryParam("max") @DefaultValue("100") int maxItems,
137 final @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit) throws ValidationError {
138
139 try {
140 ContentTypes.withName(type);
141 } catch (IllegalArgumentException e) {
142 throw new IllegalArgumentException("Invalid entity type (must be a content type)");
143 }
144
145 try (final Tx tx = beginTx()) {
146 FindReplace fr = new FindReplace(graph, commit, maxItems);
147 List<List<String>> rows = fr.findAndReplace(EntityClass.withName(type),
148 EntityClass.withName(subType), property, from, to,
149 getCurrentActioner(), getLogMessage().orElse(null));
150
151 tx.success();
152 return Table.of(rows);
153 }
154 }
155
156
157
158
159
160
161
162
163 @GET
164 @Path("schema")
165 @Produces({TURTLE_MIMETYPE, RDF_XML_MIMETYPE, N3_MIMETYPE})
166 public Response exportSchema(
167 final @QueryParam("format") String format,
168 final @QueryParam("baseUri") String baseUri) throws IOException {
169 final String rdfFormat = getRdfFormat(format, "TTL");
170 final MediaType mediaType = MediaType.valueOf(RDF_MIMETYPE_FORMATS
171 .inverse().get(rdfFormat));
172 final SchemaExporter schemaExporter = new SchemaExporter(rdfFormat);
173 return Response.ok((StreamingOutput) outputStream ->
174 schemaExporter.dumpSchema(outputStream, baseUri))
175 .type(mediaType + "; charset=utf-8").build();
176 }
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192 @POST
193 @Produces(MediaType.APPLICATION_JSON)
194 @Path("generate-concepts/{repositoryId:[^/]+}/{vocabularyId:[^/]+}")
195 public long autoLinkRepositoryDocs(
196 @PathParam("repositoryId") String repositoryId,
197 @PathParam("vocabularyId") String vocabularyId,
198 @QueryParam(ACCESS_POINT_TYPE_PARAM) Set<AccessPointType> accessPointTypes,
199 @QueryParam(LANG_PARAM) @DefaultValue(DEFAULT_LANG) String languageCode,
200 @QueryParam(SINGLE_PARAM) @DefaultValue("true") boolean excludeSingle,
201 @QueryParam(TOLERANT_PARAM) @DefaultValue("false") boolean tolerant)
202 throws ItemNotFound, ValidationError,
203 PermissionDenied, DeserializationError {
204 try (final Tx tx = beginTx()) {
205 Actioner user = getCurrentActioner();
206 Repository repository = manager.getEntity(repositoryId, Repository.class);
207 Vocabulary vocabulary = manager.getEntity(vocabularyId, Vocabulary.class);
208
209 long linkCount = linker
210 .withAccessPointTypes(accessPointTypes)
211 .withTolerant(tolerant)
212 .withExcludeSingles(excludeSingle)
213 .withDefaultLanguage(languageCode)
214 .withLogMessage(getLogMessage())
215 .createAndLinkRepositoryVocabulary(repository, vocabulary, user);
216
217 tx.success();
218 return linkCount;
219 }
220 }
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242 @POST
243 @Produces({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
244 @Consumes({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
245 @Path("regenerate-ids")
246 public Table regenerateIds(
247 @QueryParam("id") List<String> ids,
248 @QueryParam("collisions") @DefaultValue("false") boolean collisions,
249 @QueryParam(TOLERANT_PARAM) @DefaultValue("false") boolean tolerant,
250 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit,
251 Table data)
252 throws ItemNotFound, IOException, IdRegenerator.IdCollisionError {
253 try (final Tx tx = beginTx()) {
254 List<String> allIds = Lists.newArrayList(ids);
255 data.rows().stream()
256 .filter(row -> row.size() == 1)
257 .forEach(row -> allIds.add(row.get(0)));
258
259 List<Accessible> items = allIds.stream().map(id -> {
260 try {
261 return manager.getEntity(id, Accessible.class);
262 } catch (ItemNotFound e) {
263 throw new RuntimeException(e);
264 }
265 }).collect(Collectors.toList());
266
267 List<List<String>> remap = new IdRegenerator(graph)
268 .withActualRename(commit)
269 .collisionMode(collisions)
270 .skippingCollisions(tolerant)
271 .reGenerateIds(items);
272 tx.success();
273 return Table.of(remap);
274 }
275 }
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297 @POST
298 @Produces({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
299 @Path("regenerate-ids-for-type/{type:[^/]+}")
300 public Table regenerateIdsForType(
301 @PathParam("type") String type,
302 @QueryParam("collisions") @DefaultValue("false") boolean collisions,
303 @QueryParam(TOLERANT_PARAM) @DefaultValue("false") boolean tolerant,
304 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit)
305 throws IOException, IdRegenerator.IdCollisionError {
306 try (final Tx tx = beginTx()) {
307 EntityClass entityClass = EntityClass.withName(type);
308 try (CloseableIterable<Accessible> frames = manager
309 .getEntities(entityClass, Accessible.class)) {
310 List<List<String>> lists = new IdRegenerator(graph)
311 .withActualRename(commit)
312 .collisionMode(collisions)
313 .skippingCollisions(tolerant)
314 .reGenerateIds(frames);
315 tx.success();
316 return Table.of(lists);
317 }
318 }
319 }
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341 @POST
342 @Produces({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
343 @Path("regenerate-ids-for-scope/{scope:[^/]+}")
344 public Table regenerateIdsForScope(
345 @PathParam("scope") String scopeId,
346 @QueryParam("collisions") @DefaultValue("false") boolean collisions,
347 @QueryParam(TOLERANT_PARAM) @DefaultValue("false") boolean tolerant,
348 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit)
349 throws IOException, ItemNotFound, IdRegenerator.IdCollisionError {
350 try (final Tx tx = beginTx()) {
351 PermissionScope scope = manager.getEntity(scopeId, PermissionScope.class);
352 List<List<String>> lists = new IdRegenerator(graph)
353 .withActualRename(commit)
354 .skippingCollisions(tolerant)
355 .collisionMode(collisions)
356 .reGenerateIds(scope.getAllContainedItems());
357 tx.success();
358 return Table.of(lists);
359 }
360 }
361
362
363
364
365 @POST
366 @Produces("text/plain")
367 @Path("regenerate-description-ids")
368 public String regenerateDescriptionIds(
369 @QueryParam("buffer") @DefaultValue("-1") int bufferSize,
370 @QueryParam(COMMIT_PARAM) @DefaultValue("false") boolean commit)
371 throws IOException, ItemNotFound, IdRegenerator.IdCollisionError {
372 EntityClass[] types = {EntityClass.DOCUMENTARY_UNIT_DESCRIPTION, EntityClass
373 .CVOC_CONCEPT_DESCRIPTION, EntityClass.HISTORICAL_AGENT_DESCRIPTION, EntityClass
374 .REPOSITORY_DESCRIPTION};
375 int done = 0;
376 try (final Tx tx = beginTx()) {
377 final Serializer depSerializer = new Serializer.Builder(graph).dependentOnly().build();
378 for (EntityClass entityClass : types) {
379 try (CloseableIterable<Description> descriptions = manager.getEntities(entityClass, Description.class)) {
380 for (Description desc : descriptions) {
381 Described entity = desc.getEntity();
382 if (entity != null) {
383 PermissionScope scope = entity.getPermissionScope();
384 List<String> idPath = scope != null
385 ? Lists.newArrayList(scope.idPath())
386 : Lists.newArrayList();
387 idPath.add(entity.getIdentifier());
388 Bundle descBundle = depSerializer.entityToBundle(desc);
389 String newId = entityClass.getIdGen().generateId(idPath, descBundle);
390 if (!newId.equals(desc.getId()) && commit) {
391 manager.renameVertex(desc.asVertex(), desc.getId(), newId);
392 done++;
393
394 if (bufferSize > 0 && done % bufferSize == 0) {
395 tx.success();
396 }
397 }
398 }
399 }
400 }
401 }
402 if (commit && done > 0) {
403 tx.success();
404 }
405 return String.valueOf(done);
406 } catch (SerializationError e) {
407 throw new RuntimeException(e);
408 }
409 }
410
411 @POST
412 @Produces("text/plain")
413 @Path("set-labels")
414 public String setLabels() throws IOException, ItemNotFound, IdRegenerator.IdCollisionError {
415 long done = 0;
416 try (final Tx tx = beginTx()) {
417 for (Vertex v : graph.getVertices()) {
418 try {
419 ((Neo4jGraphManager) manager).setLabels(v);
420 done++;
421 } catch (org.neo4j.graphdb.ConstraintViolationException e) {
422 logger.error("Error setting labels on {} ({})", manager.getId(v), v.getId());
423 e.printStackTrace();
424 }
425
426 if (done % 100000 == 0) {
427 graph.getBaseGraph().commit();
428 }
429 }
430 tx.success();
431 }
432
433 return String.valueOf(done);
434 }
435
436 @POST
437 @Produces("text/plain")
438 @Path("set-constraints")
439 public void setConstraints() {
440 try (final Tx tx = beginTx()) {
441 logger.info("Initializing graph schema...");
442 manager.initialize();
443 tx.success();
444 }
445 }
446
447 @POST
448 @Produces("text/plain")
449 @Path("upgrade-1to2")
450 public String upgradeDb1to2() throws IOException {
451 final AtomicInteger done = new AtomicInteger();
452 try (final Tx tx = beginTx()) {
453 logger.info("Upgrading DB schema...");
454 DbUpgrader1to2 upgrader1to2 = new DbUpgrader1to2(graph, () -> {
455 if (done.getAndIncrement() % 100000 == 0) {
456 graph.getBaseGraph().commit();
457 }
458 });
459 upgrader1to2
460 .upgradeIdAndTypeKeys()
461 .upgradeTypeValues()
462 .setIdAndTypeOnEventLinks();
463 tx.success();
464 logger.info("Changed {} items", done.get());
465 return String.valueOf(done.get());
466 }
467 }
468
469 @POST
470 @Produces("text/plain")
471 @Path("full-upgrade-1to2")
472 public void fullUpgradeDb1to2()
473 throws IOException, IdRegenerator.IdCollisionError, ItemNotFound {
474 upgradeDb1to2();
475 setLabels();
476 setConstraints();
477 try (Tx tx = beginTx()) {
478 new DbUpgrader1to2(graph, () -> {
479 }).setDbSchemaVersion();
480 tx.success();
481 }
482 }
483
484
485
486
487
488
489
490
491
492
493 @POST
494 @Consumes({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
495 @Produces({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
496 @Path("relink-targets")
497 public Table relink(Table mapping) throws DeserializationError {
498 try (final Tx tx = beginTx()) {
499 List<List<String>> done = Lists.newArrayList();
500 for (List<String> row : mapping.rows()) {
501 if (row.size() != 2) {
502 throw new DeserializationError(
503 "Invalid table data: must contain 2 columns only");
504 }
505 String fromId = row.get(0);
506 String toId = row.get(1);
507 Linkable from = manager.getEntity(fromId, Linkable.class);
508 Linkable to = manager.getEntity(toId, Linkable.class);
509 int relinked = 0;
510 for (Link link : from.getLinks()) {
511 link.addLinkTarget(to);
512 link.removeLinkTarget(from);
513 relinked++;
514 }
515 done.add(Lists.newArrayList(fromId, toId, String.valueOf(relinked)));
516 }
517
518 tx.success();
519 return Table.of(done);
520 } catch (ItemNotFound e) {
521 throw new DeserializationError("Unable to locate item with ID: " + e.getValue());
522 }
523 }
524
525
526
527
528
529
530
531
532
533
534 @POST
535 @Consumes({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
536 @Produces({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
537 @Path("reparent")
538 public Table reparent(@QueryParam("commit") @DefaultValue("false") boolean commit, Table mapping)
539 throws DeserializationError {
540 try (final Tx tx = beginTx()) {
541 IdRegenerator idRegenerator = new IdRegenerator(graph).withActualRename(commit);
542 List<List<String>> done = Lists.newArrayList();
543 for (List<String> row : mapping.rows()) {
544 if (row.size() != 2) {
545 throw new DeserializationError(
546 "Invalid table data: must contain 2 columns only");
547 }
548 String id = row.get(0);
549 String newParentId = row.get(1);
550 DocumentaryUnit item = manager
551 .getEntity(id, EntityClass.DOCUMENTARY_UNIT, DocumentaryUnit.class);
552 PermissionScope parent = manager.getEntity(newParentId, PermissionScope.class);
553 item.setPermissionScope(parent);
554 if (Entities.DOCUMENTARY_UNIT.equals(parent.getType())) {
555 parent.as(DocumentaryUnit.class).addChild(item);
556 } else if (Entities.REPOSITORY.equals(parent.getType())) {
557 item.setRepository(parent.as(Repository.class));
558 } else {
559 throw new DeserializationError(String.format(
560 "Unsupported parent type for ID '%s': %s",
561 newParentId, parent.getType()));
562 }
563 try {
564 idRegenerator.reGenerateId(item).ifPresent(done::add);
565 } catch (IdRegenerator.IdCollisionError e) {
566 throw new DeserializationError(String.format(
567 "%s. Ensure they do not share the same local identifier: '%s'",
568 e.getMessage(), item.getIdentifier()));
569 }
570 }
571
572 if (commit) {
573 tx.success();
574 }
575 return Table.of(done);
576 } catch (ItemNotFound e) {
577 throw new DeserializationError("Unable to locate item with ID: " + e.getValue());
578 }
579 }
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594 @POST
595 @Consumes({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
596 @Produces({MediaType.APPLICATION_JSON, CSV_MEDIA_TYPE})
597 @Path("rename")
598 public Table rename(Table mapping)
599 throws IdRegenerator.IdCollisionError, DeserializationError {
600 try (final Tx tx = beginTx()) {
601 IdRegenerator idRegenerator = new IdRegenerator(graph).withActualRename(true);
602
603
604
605 List<List<String>> sorted = Ordering.usingToString().sortedCopy(mapping.rows());
606
607 List<List<String>> done = Lists.newArrayList();
608 for (List<String> row : sorted) {
609 if (row.size() != 2) {
610 throw new DeserializationError(
611 "Invalid table data: must contain 2 columns only");
612 }
613 String currentId = row.get(0);
614 String newLocalIdentifier = row.get(1);
615 Accessible item = manager.getEntity(currentId, Accessible.class);
616 item.asVertex().setProperty(Ontology.IDENTIFIER_KEY, newLocalIdentifier);
617 idRegenerator.reGenerateId(item).ifPresent(done::add);
618 }
619
620 tx.success();
621 return Table.of(done);
622 } catch (ItemNotFound e) {
623 throw new DeserializationError("Unable to locate item with ID: " + e.getValue());
624 }
625 }
626
627
628
629
630
631
632 @POST
633 @Path("__INITIALISE")
634 public void initialize(
635 @QueryParam("yes-i-am-sure") @DefaultValue("false") boolean confirm,
636 InputStream fixtures) throws Exception {
637 try (final Tx tx = beginTx()) {
638 sanityCheck(confirm);
639
640 for (Vertex v : graph.getVertices()) {
641 v.remove();
642 }
643 tx.success();
644 }
645 setConstraints();
646 try (final Tx tx = beginTx()) {
647 FixtureLoaderFactory.getInstance(graph, true)
648 .loadTestData(fixtures);
649 tx.success();
650 }
651 }
652
653 private void sanityCheck(boolean confirm) {
654
655 Iterator<Vertex> counter = graph.getVertices().iterator();
656 int c = 0;
657 while (counter.hasNext()) {
658 counter.next();
659 c++;
660 if (c > 500) {
661 if (!confirm) {
662 throw new RuntimeException("This database has more than 500 nodes. " +
663 "Refusing to clear it without confirmation!");
664 } else break;
665 }
666 }
667 }
668 }