1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package eu.ehri.project.tools;
21
22 import com.google.common.collect.Maps;
23 import com.google.common.collect.Sets;
24 import com.tinkerpop.frames.FramedGraph;
25 import eu.ehri.project.api.Api;
26 import eu.ehri.project.api.ApiFactory;
27 import eu.ehri.project.definitions.EventTypes;
28 import eu.ehri.project.definitions.Ontology;
29 import eu.ehri.project.exceptions.DeserializationError;
30 import eu.ehri.project.exceptions.PermissionDenied;
31 import eu.ehri.project.exceptions.ValidationError;
32 import eu.ehri.project.models.AccessPoint;
33 import eu.ehri.project.models.AccessPointType;
34 import eu.ehri.project.models.DocumentaryUnit;
35 import eu.ehri.project.models.DocumentaryUnitDescription;
36 import eu.ehri.project.models.EntityClass;
37 import eu.ehri.project.models.Link;
38 import eu.ehri.project.models.Repository;
39 import eu.ehri.project.models.base.Accessor;
40 import eu.ehri.project.models.base.Actioner;
41 import eu.ehri.project.models.cvoc.Concept;
42 import eu.ehri.project.models.cvoc.Vocabulary;
43 import eu.ehri.project.persistence.ActionManager;
44 import eu.ehri.project.persistence.Bundle;
45 import eu.ehri.project.utils.Slugify;
46 import org.slf4j.Logger;
47 import org.slf4j.LoggerFactory;
48
49 import java.util.Map;
50 import java.util.Optional;
51 import java.util.Set;
52
53 import static com.google.common.base.Preconditions.checkNotNull;
54
55
56
57
58
59
60
61 public class Linker {
62
63 private static final Logger logger = LoggerFactory.getLogger(Linker.class);
64
65 private static final String LINK_TYPE = "associative";
66 private static final String DEFAULT_LANG = "eng";
67
68 private final FramedGraph<?> graph;
69 private final boolean tolerant;
70 private final boolean excludeSingles;
71 private final Set<AccessPointType> accessPointTypes;
72 private final String defaultLanguageCode;
73 private final Optional<String> logMessage;
74
75 private Linker(FramedGraph<?> graph, Set<AccessPointType> accessPointTypes,
76 String defaultLanguageCode, Optional<String> logMessage,
77 boolean tolerant, boolean excludeSingles) {
78 this.graph = graph;
79 this.accessPointTypes = accessPointTypes;
80 this.defaultLanguageCode = defaultLanguageCode;
81 this.tolerant = tolerant;
82 this.excludeSingles = excludeSingles;
83 this.logMessage = logMessage;
84 }
85
86 public Linker(FramedGraph<?> graph) {
87 this(graph, Sets.newHashSet(),
88 DEFAULT_LANG, Optional.empty(), false, true);
89 }
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 public int createAndLinkRepositoryVocabulary(
112 Repository repository,
113 Vocabulary vocabulary,
114 Actioner user)
115 throws ValidationError, PermissionDenied {
116
117
118 Map<String, String> conceptIdentifierNames = Maps.newHashMap();
119 Map<String, Optional<Concept>> identifierConcept = Maps.newHashMap();
120 Map<String, Integer> identifierCount = Maps.newHashMap();
121
122 for (DocumentaryUnit doc : repository.getAllDocumentaryUnits()) {
123 for (DocumentaryUnitDescription description : doc.getDocumentDescriptions()) {
124 for (AccessPoint relationship : description.getAccessPoints()) {
125 if (accessPointTypes.isEmpty() || accessPointTypes
126 .contains(relationship.getRelationshipType())) {
127 String trimmedName = relationship.getName().trim();
128 String identifier = getIdentifier(relationship);
129 String prior = conceptIdentifierNames.get(identifier);
130 if (identifier.isEmpty() || trimmedName.isEmpty()) {
131 logger.warn("Ignoring empty access point name");
132 } else if (prior != null && !prior.equals(trimmedName)) {
133 logger.warn("Concept name/slug collision: '{}' -> '{}'", trimmedName,
134 prior);
135 } else {
136 conceptIdentifierNames.put(identifier, trimmedName);
137 identifierConcept.put(identifier, Optional.empty());
138 int count = identifierCount.containsKey(identifier)
139 ? identifierCount.get(identifier)
140 : 0;
141 identifierCount.put(identifier, count + 1);
142 }
143 }
144 }
145 }
146 }
147
148
149
150 if (!willCreateItems(identifierCount, excludeSingles)) {
151 return 0;
152 }
153
154
155 ActionManager actionManager = new ActionManager(graph);
156 ActionManager.EventContext conceptEvent = actionManager
157 .setScope(vocabulary)
158 .newEventContext(user, EventTypes.creation, logMessage);
159 Api api = ApiFactory.noLogging(graph, user.as(Accessor.class));
160
161 for (Map.Entry<String, String> idName : conceptIdentifierNames.entrySet()) {
162 String identifier = idName.getKey();
163 String name = idName.getValue();
164
165
166 if (identifierCount.get(identifier) < 2 && excludeSingles) {
167 continue;
168 }
169
170 Bundle conceptBundle = Bundle.Builder.withClass(EntityClass.CVOC_CONCEPT)
171 .addDataValue(Ontology.IDENTIFIER_KEY, identifier)
172 .addRelation(Ontology.DESCRIPTION_FOR_ENTITY, Bundle.Builder
173 .withClass(EntityClass.CVOC_CONCEPT_DESCRIPTION)
174 .addDataValue(Ontology.LANGUAGE_OF_DESCRIPTION, defaultLanguageCode)
175 .addDataValue(Ontology.NAME_KEY, name)
176 .build())
177 .build();
178
179 try {
180 Concept concept = api.create(conceptBundle, Concept.class);
181 concept.setVocabulary(vocabulary);
182 identifierConcept.put(identifier, Optional.of(concept));
183 conceptEvent.addSubjects(concept);
184 } catch (ValidationError validationError) {
185
186
187
188
189
190
191 logger.warn("Id/name collision error: '{}' -> '{}' ('{}')", identifier, name,
192 conceptIdentifierNames.get(identifier));
193 logger.error("Link integrity error: ", validationError);
194 if (!tolerant) {
195 throw validationError;
196 }
197 } catch (DeserializationError e) {
198 throw new RuntimeException(e);
199 }
200 }
201
202 conceptEvent.commit();
203
204
205
206 ActionManager.EventContext linkEvent = actionManager
207 .newEventContext(user, EventTypes.creation, logMessage);
208 int linkCount = 0;
209 for (DocumentaryUnit doc : repository.getAllDocumentaryUnits()) {
210 for (DocumentaryUnitDescription description : doc.getDocumentDescriptions()) {
211 for (AccessPoint relationship : description.getAccessPoints()) {
212 if (accessPointTypes.isEmpty() || accessPointTypes
213 .contains(relationship.getRelationshipType())) {
214
215 String identifier = getIdentifier(relationship);
216
217 if (identifierCount.get(identifier) < 2 && excludeSingles) {
218 continue;
219 }
220
221 Optional<Concept> conceptOpt = identifierConcept.get(identifier);
222 try {
223 if (conceptOpt != null && conceptOpt.isPresent()) {
224 Concept concept = conceptOpt.get();
225 Bundle linkBundle = Bundle.Builder.withClass(EntityClass.LINK)
226 .addDataValue(Ontology.LINK_HAS_TYPE, LINK_TYPE)
227 .build();
228 Link link = api.create(linkBundle, Link.class);
229 link.addLinkTarget(doc);
230 link.addLinkTarget(concept);
231 link.addLinkBody(relationship);
232 linkEvent.addSubjects(link);
233 linkCount++;
234 }
235 } catch (DeserializationError e) {
236 throw new RuntimeException(e);
237 }
238 }
239 }
240 }
241 }
242
243 linkEvent.commit();
244
245 return linkCount;
246 }
247
248
249
250
251
252
253
254
255 public Linker withExcludeSingles(boolean excludeSingles) {
256 return new Linker(graph, accessPointTypes, DEFAULT_LANG,
257 logMessage, tolerant, excludeSingles);
258 }
259
260
261
262
263
264
265
266
267
268 public Linker withTolerant(boolean tolerant) {
269 return new Linker(graph, accessPointTypes, DEFAULT_LANG,
270 logMessage, tolerant, excludeSingles);
271 }
272
273
274
275
276
277
278
279 public Linker withDefaultLanguage(String defaultLanguageCode) {
280 return new Linker(graph, accessPointTypes, checkNotNull(defaultLanguageCode),
281 logMessage, tolerant, excludeSingles);
282 }
283
284
285
286
287
288
289
290 Linker withLogMessage(String logMessage) {
291 return new Linker(graph, accessPointTypes, checkNotNull(defaultLanguageCode),
292 Optional.ofNullable(logMessage), tolerant, excludeSingles);
293 }
294
295
296
297
298
299
300
301 public Linker withLogMessage(Optional<String> logMessage) {
302 return new Linker(graph, accessPointTypes, checkNotNull(defaultLanguageCode),
303 checkNotNull(logMessage), tolerant, excludeSingles);
304 }
305
306
307
308
309
310
311
312
313
314 public Linker withAccessPointTypes(Set<AccessPointType> accessPointTypes) {
315 return new Linker(graph, Sets.newHashSet(checkNotNull(accessPointTypes)),
316 defaultLanguageCode, logMessage, tolerant, excludeSingles);
317 }
318
319
320
321
322
323
324
325
326 Linker withAccessPointType(AccessPointType accessPointType) {
327 Set<AccessPointType> tmp = Sets.newHashSet(checkNotNull(accessPointTypes));
328 tmp.add(accessPointType);
329 return new Linker(graph, tmp, defaultLanguageCode,
330 logMessage, tolerant, excludeSingles);
331 }
332
333
334
335 private static boolean willCreateItems(Map<String, Integer> identifierCounts, boolean excludeSingles) {
336 if (identifierCounts.isEmpty()) {
337 return false;
338 } else if (excludeSingles) {
339 Integer maxCount = 0;
340 for (Integer c : identifierCounts.values()) {
341 if (c != null && c > maxCount) {
342 maxCount = c;
343 }
344 }
345 if (maxCount < 2) {
346 return false;
347 }
348 }
349 return true;
350 }
351
352 private static String getIdentifier(AccessPoint relationship) {
353 return Slugify.slugify(relationship.getName().trim())
354 .replaceAll("^-+", "")
355 .replaceAll("-+$", "");
356 }
357 }