1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package eu.ehri.project.importers.cvoc;
21
22 import com.google.common.base.Splitter;
23 import com.google.common.collect.Lists;
24 import com.google.common.collect.Maps;
25 import com.google.common.collect.Sets;
26 import com.tinkerpop.frames.FramedGraph;
27 import com.typesafe.config.Config;
28 import com.typesafe.config.ConfigFactory;
29 import eu.ehri.project.api.Api;
30 import eu.ehri.project.api.ApiFactory;
31 import eu.ehri.project.core.GraphManager;
32 import eu.ehri.project.core.GraphManagerFactory;
33 import eu.ehri.project.definitions.EventTypes;
34 import eu.ehri.project.definitions.Ontology;
35 import eu.ehri.project.definitions.SkosMultilingual;
36 import eu.ehri.project.exceptions.DeserializationError;
37 import eu.ehri.project.exceptions.ItemNotFound;
38 import eu.ehri.project.exceptions.PermissionDenied;
39 import eu.ehri.project.exceptions.SerializationError;
40 import eu.ehri.project.exceptions.ValidationError;
41 import eu.ehri.project.importers.ImportLog;
42 import eu.ehri.project.models.EntityClass;
43 import eu.ehri.project.models.Link;
44 import eu.ehri.project.models.base.Accessor;
45 import eu.ehri.project.models.base.Actioner;
46 import eu.ehri.project.models.base.Described;
47 import eu.ehri.project.models.base.Linkable;
48 import eu.ehri.project.models.cvoc.AuthoritativeItem;
49 import eu.ehri.project.models.cvoc.AuthoritativeSet;
50 import eu.ehri.project.models.cvoc.Concept;
51 import eu.ehri.project.models.cvoc.Vocabulary;
52 import eu.ehri.project.persistence.ActionManager;
53 import eu.ehri.project.persistence.Bundle;
54 import eu.ehri.project.persistence.BundleManager;
55 import eu.ehri.project.persistence.Mutation;
56 import eu.ehri.project.persistence.Serializer;
57 import eu.ehri.project.utils.LanguageHelpers;
58 import org.apache.jena.ontology.OntClass;
59 import org.apache.jena.ontology.OntModel;
60 import org.apache.jena.ontology.OntModelSpec;
61 import org.apache.jena.ontology.OntResource;
62 import org.apache.jena.rdf.model.ModelFactory;
63 import org.apache.jena.rdf.model.RDFNode;
64 import org.apache.jena.rdf.model.Resource;
65 import org.apache.jena.rdf.model.Statement;
66 import org.apache.jena.util.iterator.ExtendedIterator;
67 import org.apache.jena.vocabulary.OWL;
68 import org.apache.jena.vocabulary.RDF;
69 import org.apache.jena.vocabulary.RDFS;
70 import org.apache.jena.vocabulary.SKOS;
71 import org.apache.jena.vocabulary.SKOSXL;
72 import org.slf4j.Logger;
73 import org.slf4j.LoggerFactory;
74
75 import java.io.IOException;
76 import java.io.InputStream;
77 import java.net.URI;
78 import java.nio.file.Files;
79 import java.nio.file.Paths;
80 import java.util.Collections;
81 import java.util.List;
82 import java.util.Locale;
83 import java.util.Map;
84 import java.util.Objects;
85 import java.util.Optional;
86 import java.util.Set;
87 import java.util.function.BiConsumer;
88 import java.util.function.Function;
89 import java.util.stream.Collectors;
90 import java.util.stream.Stream;
91
92
93
94
95 public final class JenaSkosImporter implements SkosImporter {
96
97 private static final Logger logger = LoggerFactory.getLogger(JenaSkosImporter.class);
98 private static final Config config = ConfigFactory.load();
99 private static final Splitter codeSplitter = Splitter.on('-')
100 .omitEmptyStrings().trimResults().limit(2);
101 private final FramedGraph<?> framedGraph;
102 private final Actioner actioner;
103 private final Vocabulary vocabulary;
104 private final BundleManager dao;
105 private final Api api;
106 private final Serializer mergeSerializer;
107 private final boolean tolerant;
108 private final String format;
109 private final String baseURI;
110 private final String suffix;
111 private final String defaultLang;
112 private static final String DEFAULT_LANG = Locale.ENGLISH.getISO3Language();
113 private static final Bundle linkTemplate = Bundle.of(EntityClass.LINK)
114 .withDataValue(Ontology.LINK_HAS_DESCRIPTION, config.getString("io.import.defaultLinkText"))
115 .withDataValue(Ontology.LINK_HAS_TYPE, config.getString("io.import.defaultLinkType"));
116
117
118
119
120
121
122
123
124
125
126
127 public JenaSkosImporter(FramedGraph<?> framedGraph, Actioner actioner,
128 Vocabulary vocabulary, boolean tolerant, String baseURI, String suffix, String format, String defaultLang) {
129 this.framedGraph = framedGraph;
130 this.actioner = actioner;
131 this.vocabulary = vocabulary;
132 this.api = ApiFactory.noLogging(framedGraph, actioner.as(Accessor.class));
133 this.mergeSerializer = new Serializer.Builder(framedGraph).dependentOnly().build();
134 this.tolerant = tolerant;
135 this.baseURI = baseURI;
136 this.suffix = suffix;
137 this.format = format;
138 this.defaultLang = defaultLang;
139 this.dao = new BundleManager(framedGraph, vocabulary.idPath());
140 }
141
142 private static class StringValue {
143 private final String str;
144 private final String lang;
145
146 StringValue(String str, String lang) {
147 this.str = str;
148 this.lang = lang;
149 }
150
151 String getValue() {
152 return str;
153 }
154
155 String getLang() {
156 return lang;
157 }
158
159 @Override
160 public String toString() {
161 return "\"" + str + "\"" + (
162 lang == null || lang.trim().isEmpty() ? "" : "@" + lang);
163 }
164 }
165
166
167
168
169
170
171
172
173 public JenaSkosImporter(FramedGraph<?> framedGraph, Actioner actioner,
174 Vocabulary vocabulary) {
175 this(framedGraph, actioner, vocabulary, false, null, null, null, DEFAULT_LANG);
176 }
177
178 @Override
179 public JenaSkosImporter setTolerant(boolean tolerant) {
180 logger.debug("Setting importer to tolerant: {}", tolerant);
181 return new JenaSkosImporter(
182 framedGraph, actioner, vocabulary, tolerant, baseURI, suffix, format, defaultLang);
183 }
184
185 @Override
186 public JenaSkosImporter setBaseURI(String prefix) {
187 logger.debug("Setting importer base URI: {}", prefix);
188 return new JenaSkosImporter(
189 framedGraph, actioner, vocabulary, tolerant, prefix, suffix, format, defaultLang);
190 }
191
192 @Override
193 public JenaSkosImporter setURISuffix(String suffix) {
194 logger.debug("Setting importer URI: suffix {}", suffix);
195 return new JenaSkosImporter(
196 framedGraph, actioner, vocabulary, tolerant, baseURI, suffix, format, defaultLang);
197 }
198
199 @Override
200 public JenaSkosImporter setFormat(String format) {
201 logger.debug("Setting importer format: {}", format);
202 return new JenaSkosImporter(
203 framedGraph, actioner, vocabulary, tolerant, baseURI, suffix, format, defaultLang);
204 }
205
206 @Override
207 public JenaSkosImporter setDefaultLang(String lang) {
208 logger.debug("Setting importer default language: {}", lang);
209 return new JenaSkosImporter(
210 framedGraph, actioner, vocabulary, tolerant, baseURI, suffix, format,
211 LanguageHelpers.iso639DashTwoCode(lang));
212 }
213
214
215
216
217
218
219
220
221 @Override
222 public ImportLog importFile(String filePath, String logMessage)
223 throws IOException, ValidationError {
224 try (InputStream ios = Files.newInputStream(Paths.get(filePath))) {
225 return importFile(ios, logMessage);
226 }
227 }
228
229
230
231
232
233
234
235
236 @Override
237 public ImportLog importFile(InputStream ios, String logMessage) throws IOException, ValidationError {
238
239
240 Optional<String> logMsg = getLogMessage(logMessage);
241 ActionManager.EventContext eventContext = new ActionManager(framedGraph, vocabulary).newEventContext(
242 actioner, EventTypes.ingest, logMsg);
243
244 ImportLog log = new ImportLog(logMsg.orElse(null));
245
246
247
248
249 OntModel model = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM_RULE_INF);
250 try {
251 model.add(SKOSXL.prefLabel, RDFS.subPropertyOf, SKOS.prefLabel);
252 model.add(SKOSXL.altLabel, RDFS.subPropertyOf, SKOS.altLabel);
253 model.add(SKOSXL.hiddenLabel, RDFS.subPropertyOf, SKOS.hiddenLabel);
254
255
256
257 model.add(SKOS.broader, OWL.inverseOf, SKOS.narrower);
258
259 model.read(ios, null, format);
260 OntClass conceptClass = model.getOntClass(SkosRDFVocabulary.CONCEPT.getURI().toString());
261 logger.debug("in import file: {}", SkosRDFVocabulary.CONCEPT.getURI());
262 Map<Resource, Concept> imported = Maps.newHashMap();
263
264 ExtendedIterator<? extends OntResource> itemIterator = conceptClass.listInstances();
265 try {
266 while (itemIterator.hasNext()) {
267 Resource item = itemIterator.next();
268
269 try {
270 Mutation<Concept> graphConcept = importConcept(item);
271 imported.put(item, graphConcept.getNode());
272
273 switch (graphConcept.getState()) {
274 case UNCHANGED:
275 log.addUnchanged();
276 break;
277 case CREATED:
278 log.addCreated();
279 eventContext.addSubjects(graphConcept.getNode());
280 break;
281 case UPDATED:
282 log.addUpdated();
283 eventContext.addSubjects(graphConcept.getNode());
284 break;
285 }
286 } catch (ValidationError validationError) {
287 if (tolerant) {
288 logger.error(validationError.getMessage());
289 log.addError(item.toString(), validationError.getMessage());
290 } else {
291 throw validationError;
292 }
293 }
294 }
295 } finally {
296 itemIterator.close();
297 }
298
299 for (Map.Entry<Resource, Concept> pair : imported.entrySet()) {
300 hookupRelationships(pair.getKey(), pair.getValue(), imported);
301 }
302
303 for (Concept concept : imported.values()) {
304 vocabulary.addItem(concept);
305 concept.setPermissionScope(vocabulary);
306 }
307
308 if (log.hasDoneWork()) {
309 eventContext.commit();
310 }
311
312 return log;
313 } finally {
314 model.close();
315 }
316 }
317
318 private Mutation<Concept> importConcept(Resource item) throws ValidationError {
319 logger.debug("Importing: {}", item);
320 Bundle.Builder builder = Bundle.Builder.withClass(EntityClass.CVOC_CONCEPT)
321 .addDataValue(Ontology.IDENTIFIER_KEY, getId(URI.create(item.getURI())))
322 .addDataValue(Ontology.URI_KEY, item.getURI());
323
324 for (Map.Entry<String, URI> prop : SkosRDFVocabulary.GENERAL_PROPS.entrySet()) {
325 for (RDFNode target : getObjectWithPredicate(item, prop.getValue())) {
326 if (target.isLiteral()) {
327 if (prop.getKey().equals("latitude/longitude")) {
328 String[] latLon = target.asLiteral().getString().split(",");
329 if (latLon.length > 1) {
330 builder.addDataValue("latitude", latLon[0]);
331 builder.addDataValue("longitude", latLon[1]);
332 }
333 } else {
334 builder.addDataMultiValue(prop.getKey(), target.asLiteral().getString());
335 }
336 } else {
337 builder.addDataMultiValue(prop.getKey(), target.toString());
338 }
339 }
340 }
341
342 Map<AuthoritativeItem, String> linkedConcepts = Maps.newHashMap();
343
344 List<Bundle> unknown = getAdditionalRelations(item, linkedConcepts);
345 for (Bundle description : getDescriptions(item)) {
346 Bundle withRels = description
347 .withRelations(Ontology.HAS_UNKNOWN_PROPERTY, unknown);
348 builder.addRelation(Ontology.DESCRIPTION_FOR_ENTITY, withRels);
349 }
350
351 Mutation<Concept> mut = dao.createOrUpdate(builder.build(), Concept.class);
352 createLinks(mut.getNode(), linkedConcepts);
353 return mut;
354 }
355
356 private void createLinks(Concept unit, Map<AuthoritativeItem, String> linkedConcepts) {
357 for (AuthoritativeItem concept : linkedConcepts.keySet()) {
358 try {
359 String relType = linkedConcepts.get(concept);
360 String typeKey = relType.substring(0, relType.indexOf(":"));
361 String typeValue = relType.substring(relType.indexOf(":") + 1);
362 Bundle data = linkTemplate.withDataValue(typeKey, typeValue);
363 Optional<Link> existing = findLink(unit, concept, data);
364 if (!existing.isPresent()) {
365 Link link = api.create(data, Link.class);
366 unit.addLink(link);
367 concept.addLink(link);
368 }
369 } catch (ValidationError | PermissionDenied | DeserializationError | SerializationError ex) {
370 logger.error("Unexpected error creating relationship link", ex);
371 }
372 }
373 }
374
375 private Optional<Link> findLink(Described unit, Linkable target, Bundle data) throws SerializationError {
376 for (Link link : unit.getLinks()) {
377 for (Linkable connected : link.getLinkTargets()) {
378 if (target.equals(connected)
379 && mergeSerializer.entityToBundle(link).equals(data)) {
380 return Optional.of(link);
381 }
382 }
383 }
384 return Optional.empty();
385 }
386
387 private List<Bundle> getAdditionalRelations(Resource item, Map<AuthoritativeItem, String> linkedItems) {
388 List<Bundle> unknown = Lists.newArrayList();
389
390 for (Map.Entry<String, URI> rel : SkosRDFVocabulary.RELATION_PROPS.entrySet()) {
391 for (RDFNode annotation : getObjectWithPredicate(item, rel.getValue())) {
392 if (annotation.isLiteral()) {
393 unknown.add(Bundle.Builder.withClass(EntityClass.UNKNOWN_PROPERTY)
394 .addDataValue(rel.getKey(), annotation.toString())
395 .build());
396 } else {
397 if (rel.getKey().startsWith("skos:") || rel.getKey().startsWith("sem:")) {
398 Optional<AuthoritativeItem> found = findRelatedConcept(annotation.toString());
399 if (found.isPresent()) {
400 linkedItems.put(found.get(), rel.getKey());
401 } else {
402 unknown.add(Bundle.Builder.withClass(EntityClass.UNKNOWN_PROPERTY)
403 .addDataValue(rel.getKey(), annotation.toString())
404 .build());
405 }
406 }
407 }
408 }
409 }
410 return unknown;
411 }
412
413 private Optional<AuthoritativeItem> findRelatedConcept(String name) {
414 if (name != null) {
415 String[] domains = name.split("/");
416 if (domains.length > 2) {
417 String cvocId = domains[domains.length - 2];
418 String conceptId = domains[domains.length - 1];
419 AuthoritativeSet referredSet;
420 try {
421 GraphManager manager = GraphManagerFactory.getInstance(framedGraph);
422 referredSet = manager.getEntity(cvocId, AuthoritativeSet.class);
423 for (AuthoritativeItem authItem : referredSet.getAuthoritativeItems()) {
424 if (authItem.getIdentifier().equals(conceptId)) {
425 return Optional.of(authItem);
426 }
427 }
428 } catch (ItemNotFound ex) {
429 logger.error("AuthoritativeSet with id {} not found: {}", cvocId, ex.getMessage());
430 }
431 }
432 }
433 return Optional.empty();
434 }
435
436 private List<RDFNode> getObjectWithPredicate(Resource item, final URI propUri) {
437
438
439 return item.listProperties().filterKeep(statement ->
440 statement.getPredicate()
441 .hasURI(propUri.toString()))
442 .mapWith(Statement::getObject).toList();
443 }
444
445 private void connectRelation(Concept current, Resource item, Map<Resource, Concept> others,
446 URI propUri, Function<Concept, Iterable<Concept>> getter,
447 BiConsumer<Concept, Concept> addFunc, BiConsumer<Concept, Concept> dropFunc) {
448 Set<Concept> existingRelations = Sets.newHashSet(getter.apply(current));
449 Set<Concept> newRelations = getObjectWithPredicate(item, propUri)
450 .stream()
451 .filter(RDFNode::isResource)
452 .map(n -> others.get(n.asResource()))
453 .filter(Objects::nonNull)
454 .collect(Collectors.toSet());
455 if (!existingRelations.equals(newRelations)) {
456 logger.debug("Updating relations for concept: {}: {} -> {} => {}",
457 propUri, current.getId(), existingRelations, newRelations);
458
459 Sets.difference(existingRelations, newRelations)
460 .forEach(e -> dropFunc.accept(current, e));
461 Sets.difference(newRelations, existingRelations)
462 .forEach(n -> addFunc.accept(current, n));
463 }
464 }
465
466 private void hookupRelationships(Resource item, Concept current, Map<Resource, Concept> conceptMap) {
467 connectRelation(current, item, conceptMap, SkosRDFVocabulary.BROADER.getURI(),
468 Concept::getBroaderConcepts, Concept::addBroaderConcept, Concept::removeBroaderConcept);
469 connectRelation(current, item, conceptMap, SkosRDFVocabulary.NARROWER.getURI(),
470 Concept::getNarrowerConcepts, Concept::addNarrowerConcept, Concept::removeNarrowerConcept);
471 connectRelation(current, item, conceptMap, SkosRDFVocabulary.RELATED.getURI(),
472 Concept::getRelatedConcepts, Concept::addRelatedConcept, Concept::removeRelatedConcept);
473 }
474
475 private List<StringValue> getReifiedObjectValue(Resource item, URI propUri) {
476 List<StringValue> values = Lists.newArrayList();
477 for (RDFNode node : getObjectWithPredicate(item, propUri)) {
478 if (node.isLiteral()) {
479 values.add(new StringValue(node.asLiteral().getString(), node.asLiteral().getLanguage()));
480 } else {
481 Stream.of(SKOSXL.literalForm, RDF.value)
482 .map(prop -> node.asResource().getProperty(prop))
483 .filter(Objects::nonNull)
484 .map(Statement::getObject)
485 .filter(Objects::nonNull)
486 .findFirst()
487 .ifPresent(object -> values.add(new StringValue(
488 object.asLiteral().getString(), object.asLiteral().getLanguage())));
489 }
490 }
491 return values;
492 }
493
494 private List<Bundle> getDescriptions(Resource item) {
495 List<Bundle> descriptions = Lists.newArrayList();
496
497 for (StringValue property : getReifiedObjectValue(item, SkosRDFVocabulary.PREF_LABEL.getURI())) {
498
499 Bundle.Builder builder = Bundle.Builder.withClass(EntityClass.CVOC_CONCEPT_DESCRIPTION);
500
501 String langCode2Letter = property.getLang();
502 String langCode3Letter = getLanguageCode(langCode2Letter, defaultLang);
503 Optional<String> descCode = getScriptCode(langCode2Letter);
504
505 builder.addDataValue(Ontology.NAME_KEY, property.getValue())
506 .addDataValue(Ontology.LANGUAGE, langCode3Letter);
507 descCode.ifPresent(code -> builder.addDataValue(Ontology.IDENTIFIER_KEY, code));
508
509 for (Map.Entry<String, List<URI>> prop : SkosRDFVocabulary.LANGUAGE_PROPS.entrySet()) {
510 List<String> values = Lists.newArrayList();
511 for (URI uri : prop.getValue()) {
512 for (StringValue target : getReifiedObjectValue(item, uri)) {
513 String propLang2Letter = target.getLang();
514 String propLanguageCode = getLanguageCode(propLang2Letter, defaultLang);
515 Optional<String> propDescCode = getScriptCode(propLang2Letter);
516 if (propLanguageCode.equals(langCode3Letter) && propDescCode.equals(descCode)) {
517 values.add(target.getValue());
518 }
519 }
520 }
521 if (!values.isEmpty()) {
522
523
524
525
526 Collections.sort(values);
527 builder.addDataValue(prop.getKey(), values);
528 }
529 }
530 Bundle bundle = builder.build();
531 logger.trace(bundle.toJson());
532 descriptions.add(bundle);
533 }
534
535
536
537
538 if (descriptions.size() == 1) {
539 List<String> all = getReifiedObjectValue(item, SkosRDFVocabulary.ALT_LABEL.getURI())
540 .stream()
541 .map(StringValue::getValue)
542 .distinct()
543 .sorted()
544 .collect(Collectors.toList());
545 if (!all.isEmpty()) {
546 descriptions.set(0, descriptions.get(0).withDataValue(SkosMultilingual.altLabel.toString(), all));
547 }
548 }
549
550 return descriptions;
551 }
552
553 private static String getLanguageCode(String langCode2Letter, String defaultLang) {
554 if (langCode2Letter == null || langCode2Letter.trim().isEmpty()) {
555 return defaultLang;
556 }
557 List<String> parts = codeSplitter.splitToList(langCode2Letter);
558 if (parts.isEmpty()) {
559 return defaultLang;
560 } else {
561 return LanguageHelpers.iso639DashTwoCode(parts.get(0));
562 }
563 }
564
565 private static Optional<String> getScriptCode(String langCode) {
566 List<String> parts = codeSplitter.splitToList(langCode);
567 return parts.size() > 1 ? Optional.of(parts.get(1)) : Optional.empty();
568 }
569
570 private String getId(URI uri) {
571 if (baseURI != null && suffix != null && uri.toString().startsWith(baseURI)) {
572 String sub = uri.toString().substring(baseURI.length());
573 return sub.substring(0, sub.lastIndexOf(suffix));
574 } else if (baseURI != null && uri.toString().startsWith(baseURI)) {
575 return uri.toString().substring(baseURI.length());
576 } else if (uri.getFragment() != null) {
577 return uri.getFragment();
578 } else {
579 return uri.getPath().substring(uri.getPath().lastIndexOf("/") + 1)
580 + (uri.getQuery() != null ? uri.getQuery() : "")
581 + (uri.getFragment() != null ? uri.getFragment() : "");
582 }
583 }
584
585 private Optional<String> getLogMessage(String msg) {
586 return msg.trim().isEmpty() ? Optional.empty() : Optional.of(msg);
587 }
588 }