1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package eu.ehri.project.importers.ead;
21
22 import com.google.common.base.Joiner;
23 import com.google.common.collect.ImmutableList;
24 import com.google.common.collect.ImmutableMap;
25 import com.google.common.collect.Lists;
26 import eu.ehri.project.definitions.Entities;
27 import eu.ehri.project.definitions.Ontology;
28 import eu.ehri.project.exceptions.ValidationError;
29 import eu.ehri.project.importers.base.ItemImporter;
30 import eu.ehri.project.importers.base.SaxXmlHandler;
31 import eu.ehri.project.importers.properties.XmlImportProperties;
32 import eu.ehri.project.importers.util.ImportHelpers;
33 import eu.ehri.project.models.DocumentaryUnit;
34 import eu.ehri.project.models.MaintenanceEvent;
35 import eu.ehri.project.models.MaintenanceEventType;
36 import eu.ehri.project.models.base.Entity;
37 import eu.ehri.project.persistence.Bundle;
38 import org.slf4j.Logger;
39 import org.slf4j.LoggerFactory;
40 import org.xml.sax.Attributes;
41 import org.xml.sax.SAXException;
42
43 import java.util.ArrayList;
44 import java.util.List;
45 import java.util.Locale;
46 import java.util.Map;
47 import java.util.Stack;
48 import java.util.regex.Pattern;
49
50
51
52
53
54
55
56
57 public class EadHandler extends SaxXmlHandler {
58
59
60 static final String EADID = "eadid",
61 ARCHDESC = "archdesc",
62 DID = "did";
63
64
65
66
67 private static final List<String> eadFileGlobals = ImmutableList.of(
68 "rulesAndConventions", "processInfo"
69 );
70
71 private static final String DEFAULT_PROPERTIES = "ead2002.properties";
72
73 private final List<Map<String, Object>> globalMaintenanceEvents = Lists.newArrayList();
74
75 private final Map<String, Class<? extends Entity>> possibleSubNodes = ImmutableMap.of(
76 Entities.MAINTENANCE_EVENT, MaintenanceEvent.class
77 );
78
79 private static final Logger logger = LoggerFactory.getLogger(EadHandler.class);
80
81 @SuppressWarnings("unchecked")
82 protected final List<DocumentaryUnit>[] children = new ArrayList[12];
83
84
85
86
87
88 private final Stack<String> scopeIds = new Stack<>();
89
90
91 private final static Pattern childItemPattern = Pattern.compile("^/*c(?:\\d*)$");
92
93
94
95
96 private String eadLanguage = Locale.ENGLISH.getISO3Language();
97 private String eadId;
98
99
100
101
102 @Override
103 public org.xml.sax.InputSource resolveEntity(String publicId, String systemId)
104 throws org.xml.sax.SAXException, java.io.IOException {
105
106 return new org.xml.sax.InputSource(new java.io.StringReader(""));
107 }
108
109
110
111
112
113
114 public EadHandler(ItemImporter<Map<String, Object>, ?> importer) {
115 this(importer, new XmlImportProperties(DEFAULT_PROPERTIES));
116 logger.warn("Using default properties file: {}", DEFAULT_PROPERTIES);
117 }
118
119
120
121
122
123
124
125 public EadHandler(ItemImporter<Map<String, Object>, ?> importer,
126 XmlImportProperties properties) {
127 super(importer, properties);
128 children[depth] = Lists.newArrayList();
129 }
130
131 @Override
132 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
133 super.startElement(uri, localName, qName, attributes);
134
135 if (isUnitDelimiter(qName)) {
136 children[depth] = Lists.newArrayList();
137 }
138 if (qName.equals("profiledesc")) {
139 putPropertyInCurrentGraph(Ontology.MAINTENANCE_EVENT_TYPE, MaintenanceEventType.created.toString());
140 }
141 if (qName.equals("change")) {
142 putPropertyInCurrentGraph(Ontology.MAINTENANCE_EVENT_TYPE, MaintenanceEventType.updated.toString());
143 }
144 }
145
146
147
148
149
150
151 protected List<String> pathIds() {
152 if (scopeIds.isEmpty()) {
153 return scopeIds;
154 } else {
155 List<String> path = Lists.newArrayList();
156 for (int i = 0; i < scopeIds.size() - 1; i++) {
157 path.add(scopeIds.get(i));
158 }
159 return path;
160 }
161
162 }
163
164 private String getCurrentTopIdentifier() {
165 Object current = currentGraphPath.peek().get(ImportHelpers.OBJECT_IDENTIFIER);
166 if (current instanceof List<?>) {
167 return (String) ((List) current).get(0);
168 } else {
169 return (String) current;
170 }
171 }
172
173
174
175
176
177
178
179
180
181
182 @Override
183 public void endElement(String uri, String localName, String qName) throws SAXException {
184
185 super.endElement(uri, localName, qName);
186
187
188
189 if (qName.equals(EADID)) {
190 eadId = ((String) currentGraphPath.peek().get(Ontology.SOURCEFILE_KEY));
191 logger.debug("Found <{}>: {}", EADID, eadId);
192 }
193
194 if (localName.equals("language") || qName.equals("language")) {
195 String lang = (String) currentGraphPath.peek().get("languageCode");
196 if (lang != null)
197 eadLanguage = lang;
198 }
199
200
201
202
203
204 if (qName.equals(DID)) {
205 extractIdentifier(currentGraphPath.peek());
206 String topId = getCurrentTopIdentifier();
207 scopeIds.push(topId);
208 logger.debug("Current id path: {}", scopeIds);
209 }
210
211 if (needToCreateSubNode(qName)) {
212 Map<String, Object> currentGraph = currentGraphPath.pop();
213
214 if (isUnitDelimiter(qName)) {
215 try {
216
217
218 extractIdentifier(currentGraph);
219
220
221 extractTitle(currentGraph);
222
223 useDefaultLanguage(currentGraph);
224
225 extractDate(currentGraph);
226
227
228 currentGraph.put(Ontology.SOURCEFILE_KEY, getSourceFileId());
229
230
231 if (qName.equals(ARCHDESC)) {
232
233 addGlobalValues(currentGraph, currentGraphPath.peek(), eadFileGlobals);
234 }
235
236 if (!globalMaintenanceEvents.isEmpty() && !currentGraph.containsKey(Entities.MAINTENANCE_EVENT)) {
237 logger.debug("Adding global maintenance events: {}", globalMaintenanceEvents);
238 currentGraph.put(Entities.MAINTENANCE_EVENT, globalMaintenanceEvents);
239 }
240
241 DocumentaryUnit current = (DocumentaryUnit) importer.importItem(currentGraph, pathIds());
242
243 logger.debug("importer used: {}", importer.getClass());
244 if (depth > 0) {
245 children[depth - 1].add(current);
246
247
248
249
250 for (DocumentaryUnit child : children[depth]) {
251 if (child != null) {
252 current.addChild(child);
253 child.setPermissionScope(current);
254 }
255 }
256 }
257 } catch (ValidationError ex) {
258 Bundle bundle = ex.getBundle();
259 if (bundle.getId() == null) {
260
261
262 String path = pathIds().isEmpty() ? null : Joiner.on("/").join(pathIds());
263 String ref = String.format("[Item completed prior to line: %d]",
264 locator.getLineNumber());
265 String id = Joiner.on(" ").skipNulls().join(path, locator.getSystemId(), ref);
266 importer.handleError(new ValidationError(bundle.withId(id), ex.getErrorSet()));
267 } else {
268 importer.handleError(ex);
269 }
270 } finally {
271 depth--;
272 scopeIds.pop();
273 }
274 } else {
275
276 if (getMappedProperty(currentPath).equals(Entities.MAINTENANCE_EVENT)
277 && (qName.equals("profiledesc") || qName.equals("change"))) {
278 Map<String, Object> me = ImportHelpers.getSubNode(currentGraph);
279 me.put("order", globalMaintenanceEvents.size());
280 globalMaintenanceEvents.add(me);
281 }
282 putSubGraphInCurrentGraph(getMappedProperty(currentPath), currentGraph);
283 depth--;
284 }
285 }
286
287 currentPath.pop();
288 if (currentPath.isEmpty()) {
289 currentGraphPath.pop();
290 }
291
292 }
293
294
295
296
297 protected String getSourceFileId() {
298 if (eadId == null) {
299 logger.error("EADID not set yet, or not given in eadfile");
300 return null;
301 } else {
302 String suffix = "#" + eadLanguage.toUpperCase();
303 if (eadId.toUpperCase().endsWith(suffix)) {
304 return eadId;
305 }
306 return eadId + suffix;
307 }
308 }
309
310
311
312
313
314
315
316 protected void useDefaultLanguage(Map<String, Object> currentGraph) {
317 useDefaultLanguage(currentGraph, eadLanguage);
318 }
319
320
321
322
323
324
325
326
327 private void useDefaultLanguage(Map<String, Object> currentGraph, String defaultLanguage) {
328 if (!currentGraph.containsKey(Ontology.LANGUAGE_OF_DESCRIPTION)) {
329 logger.debug("Using default language code: {}", defaultLanguage);
330 currentGraph.put(Ontology.LANGUAGE_OF_DESCRIPTION, defaultLanguage);
331 }
332 }
333
334
335
336
337
338
339 protected void extractTitle(Map<String, Object> currentGraph) {
340 if (!currentGraph.containsKey(Ontology.NAME_KEY)) {
341 logger.error("no name found, using identifier {}", currentGraph.get(ImportHelpers.OBJECT_IDENTIFIER));
342 currentGraph.put(Ontology.NAME_KEY, currentGraph.get(ImportHelpers.OBJECT_IDENTIFIER));
343 }
344 }
345
346
347
348
349
350
351
352 @SuppressWarnings("unused")
353 private void extractDate(Map<String, Object> currentGraph) {
354 }
355
356
357
358
359
360
361
362 protected void extractIdentifier(Map<String, Object> currentGraph) {
363
364
365 if (currentGraph.containsKey(ImportHelpers.OBJECT_IDENTIFIER)) {
366 Object idents = currentGraph.get(ImportHelpers.OBJECT_IDENTIFIER);
367 if (idents instanceof List) {
368 List identList = (List) idents;
369 currentGraph.put(ImportHelpers.OBJECT_IDENTIFIER, identList.get(0));
370 for (Object item : identList.subList(1, identList.size())) {
371 addOtherIdentifier(currentGraph, ((String) item));
372 }
373 }
374 }
375 }
376
377
378
379
380
381
382
383
384
385 protected void addOtherIdentifier(Map<String, Object> currentGraph, String otherIdentifier) {
386 if (currentGraph.containsKey(Ontology.OTHER_IDENTIFIERS)) {
387 logger.debug("adding alternative id: {}", otherIdentifier);
388 Object oids = currentGraph.get(Ontology.OTHER_IDENTIFIERS);
389 if (oids instanceof List) {
390 ((List<String>) oids).add(otherIdentifier);
391 } else {
392 currentGraph.put(Ontology.OTHER_IDENTIFIERS,
393 Lists.newArrayList(oids, otherIdentifier));
394 }
395 } else {
396 logger.debug("adding first alt id: {}", otherIdentifier);
397 currentGraph.put(Ontology.OTHER_IDENTIFIERS, Lists.newArrayList(otherIdentifier));
398 }
399 }
400
401 @Override
402 protected boolean needToCreateSubNode(String qName) {
403
404 boolean need = isUnitDelimiter(qName);
405
406 String path = getMappedProperty(currentPath);
407 if (path != null) {
408 need = need || path.endsWith("AccessPoint");
409 }
410 return need || possibleSubNodes.containsKey(getMappedProperty(currentPath));
411 }
412
413
414
415
416
417
418
419 private static boolean isUnitDelimiter(String elementName) {
420 return childItemPattern.matcher(elementName).matches() || elementName.equals(ARCHDESC);
421 }
422
423 private void addGlobalValues(Map<String, Object> currentGraph, Map<String, Object> globalGraph, List<String> eadFileGlobals) {
424 for (String key : eadFileGlobals) {
425 ImportHelpers.putPropertyInGraph(currentGraph, key, ((String) globalGraph.get(key)));
426 }
427 }
428 }