1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package eu.ehri.project.importers.ead;
21
22 import com.google.common.collect.ImmutableMap;
23 import com.google.common.collect.Lists;
24 import eu.ehri.project.definitions.Entities;
25 import eu.ehri.project.definitions.Ontology;
26 import eu.ehri.project.exceptions.ValidationError;
27 import eu.ehri.project.importers.base.ItemImporter;
28 import eu.ehri.project.importers.base.SaxXmlHandler;
29 import eu.ehri.project.importers.properties.XmlImportProperties;
30 import eu.ehri.project.importers.util.ImportHelpers;
31 import eu.ehri.project.models.DocumentaryUnit;
32 import eu.ehri.project.models.MaintenanceEvent;
33 import eu.ehri.project.models.VirtualUnit;
34 import eu.ehri.project.models.base.AbstractUnit;
35 import eu.ehri.project.models.base.Entity;
36 import org.slf4j.Logger;
37 import org.slf4j.LoggerFactory;
38 import org.xml.sax.Attributes;
39 import org.xml.sax.SAXException;
40
41 import java.util.ArrayList;
42 import java.util.List;
43 import java.util.Locale;
44 import java.util.Map;
45 import java.util.Stack;
46 import java.util.regex.Pattern;
47
48
49
50
51
52
53 public class VirtualEadHandler extends SaxXmlHandler {
54 private static final String AUTHOR = "authors",
55 SOURCEFILEID = "sourceFileId";
56
57 private final List<Map<String, Object>> globalMaintenanceEvents = Lists.newArrayList();
58
59 private final ImmutableMap<String, Class<? extends Entity>> possibleSubNodes = ImmutableMap.<String, Class<? extends Entity>>of(
60 Entities.MAINTENANCE_EVENT, MaintenanceEvent.class
61 );
62
63 private static final Logger logger = LoggerFactory
64 .getLogger(VirtualEadHandler.class);
65
66 protected final List<AbstractUnit>[] children = new ArrayList[12];
67 private final Stack<String> scopeIds = new Stack<>();
68
69 private final static Pattern childItemPattern = Pattern.compile("^/*c(?:\\d*)$");
70
71
72 private final static String ARCHDESC = "archdesc";
73 private final static String DID = "did";
74
75
76
77 private VirtualUnit topLevel;
78
79
80
81
82 protected String eadLanguage = Locale.ENGLISH.getISO3Language();
83
84
85
86
87 private String eadId;
88 private String author;
89
90
91
92
93 @Override
94 public org.xml.sax.InputSource resolveEntity(String publicId, String systemId)
95 throws org.xml.sax.SAXException, java.io.IOException {
96
97 return new org.xml.sax.InputSource(new java.io.StringReader(""));
98 }
99
100
101
102
103
104
105 @SuppressWarnings("unchecked")
106 public VirtualEadHandler(ItemImporter<Map<String, Object>, ?> importer) {
107 this(importer, new XmlImportProperties("vc.properties"));
108 logger.warn("vc.properties used");
109 }
110
111
112
113
114
115
116
117 public VirtualEadHandler(ItemImporter<Map<String, Object>, ?> importer,
118 XmlImportProperties xmlImportProperties) {
119 super(importer, xmlImportProperties);
120 children[depth] = Lists.newArrayList();
121 }
122
123 @Override
124 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
125 super.startElement(uri, localName, qName, attributes);
126
127 if (isUnitDelimiter(qName)) {
128 children[depth] = Lists.newArrayList();
129 }
130 }
131
132 protected List<String> pathIds() {
133 if (scopeIds.isEmpty()) {
134 return scopeIds;
135 } else {
136 List<String> path = Lists.newArrayList();
137 for (int i = 0; i < scopeIds.size() - 1; i++) {
138 path.add(scopeIds.get(i));
139 }
140 return path;
141 }
142
143 }
144
145 private String getCurrentTopIdentifier() {
146 Object current = currentGraphPath.peek().get(ImportHelpers.OBJECT_IDENTIFIER);
147 if (current instanceof List<?>) {
148 return (String) ((List) current).get(0);
149 } else {
150 return (String) current;
151 }
152 }
153
154
155
156
157
158
159
160
161
162
163 @Override
164 public void endElement(String uri, String localName, String qName) throws SAXException {
165
166 super.endElement(uri, localName, qName);
167
168
169
170 if (localName.equals("eadid") || qName.equals("eadid")) {
171 eadId = (String) currentGraphPath.peek().get(SOURCEFILEID);
172 logger.debug("Found <eadid>: " + eadId);
173 } else if (localName.equals("author") || qName.equals("author")) {
174 author = (String) currentGraphPath.peek().get(AUTHOR);
175 logger.debug("Found <author>: " + author);
176 }
177
178 if (localName.equals("language") || qName.equals("language")) {
179 String lang = (String) currentGraphPath.peek().get("languageCode");
180 if (lang != null)
181 eadLanguage = lang;
182 }
183
184
185
186
187
188 if (qName.equals(DID)) {
189 extractIdentifier(currentGraphPath.peek());
190 String topId = getCurrentTopIdentifier();
191 scopeIds.push(topId);
192 logger.debug("Current id path: " + scopeIds);
193 }
194
195 if (needToCreateSubNode(qName)) {
196 Map<String, Object> currentGraph = currentGraphPath.pop();
197
198 if (isUnitDelimiter(qName)) {
199 try {
200
201
202 extractIdentifier(currentGraph);
203
204
205 extractTitle(currentGraph);
206
207 useDefaultLanguage(currentGraph);
208
209 extractDate(currentGraph);
210
211 currentGraph.put(SOURCEFILEID, getSourceFileId());
212
213
214 addAuthor(currentGraph);
215
216 if (!globalMaintenanceEvents.isEmpty() && !currentGraph.containsKey(Entities.MAINTENANCE_EVENT)) {
217 logger.debug("Adding global maintenance events: {}", globalMaintenanceEvents);
218 currentGraph.put(Entities.MAINTENANCE_EVENT, globalMaintenanceEvents);
219 }
220
221 AbstractUnit current = (AbstractUnit) importer.importItem(currentGraph, pathIds());
222
223 if (current.getType().equals(Entities.VIRTUAL_UNIT)) {
224 logger.debug("virtual unit created: " + current.getIdentifier());
225 topLevel = (VirtualUnit) current;
226 logger.debug("importer used: " + importer.getClass());
227 if (depth > 0) {
228 children[depth - 1].add(current);
229
230
231
232
233 for (AbstractUnit child : children[depth]) {
234 if (child != null) {
235 if (child.getType().equals(Entities.VIRTUAL_UNIT)) {
236 logger.debug("virtual child");
237
238 ((VirtualUnit) current).addChild(((VirtualUnit) child));
239 child.setPermissionScope(current);
240 } else {
241 logger.debug("documentary child");
242 ((VirtualUnit) current).addIncludedUnit(((DocumentaryUnit) child));
243
244 }
245 }
246 }
247 }
248 } else {
249
250 logger.debug("documentary Unit found: " + current.getIdentifier());
251 if (depth > 0) {
252 children[depth - 1].add(current);
253 }
254 }
255 } catch (ValidationError ex) {
256 logger.error("caught validation error: " + ex.getMessage());
257 } finally {
258 depth--;
259 scopeIds.pop();
260 }
261 } else {
262
263 if (getMappedProperty(currentPath).equals(Entities.MAINTENANCE_EVENT)
264 && (qName.equals("profiledesc") || qName.equals("change"))) {
265 Map<String, Object> me = ImportHelpers.getSubNode(currentGraph);
266 me.put("order", globalMaintenanceEvents.size());
267 globalMaintenanceEvents.add(me);
268 }
269
270 putSubGraphInCurrentGraph(getMappedProperty(currentPath), currentGraph);
271 depth--;
272 }
273 }
274
275 currentPath.pop();
276 if (currentPath.isEmpty()) {
277 currentGraphPath.pop();
278 }
279 }
280
281 protected String getSourceFileId() {
282 if (getEadId().toLowerCase().endsWith("#" + getDefaultLanguage().toLowerCase())) {
283 return getEadId();
284 }
285 return getEadId() + "#" + getDefaultLanguage().toUpperCase();
286 }
287
288
289
290
291
292
293 protected String getEadId() {
294 if (eadId == null)
295 logger.error("eadid not set yet or empty");
296 return eadId;
297 }
298
299 protected String getAuthor() {
300 return author;
301 }
302
303
304
305
306
307
308
309 protected void useDefaultLanguage(Map<String, Object> currentGraph) {
310 useDefaultLanguage(currentGraph, getDefaultLanguage());
311 }
312
313
314
315
316
317
318
319
320 protected void useDefaultLanguage(Map<String, Object> currentGraph, String defaultLanguage) {
321
322 if (!currentGraph.containsKey(Ontology.LANGUAGE_OF_DESCRIPTION)) {
323 logger.debug("Using default language code: " + defaultLanguage);
324 currentGraph.put(Ontology.LANGUAGE_OF_DESCRIPTION, defaultLanguage);
325 }
326 }
327
328 protected String getDefaultLanguage() {
329 return eadLanguage;
330 }
331
332
333
334
335
336
337
338 protected void extractTitle(Map<String, Object> currentGraph) {
339
340 }
341
342
343
344
345
346
347
348 protected void extractDate(Map<String, Object> currentGraph) {
349
350 }
351
352
353
354
355
356
357
358 protected void extractIdentifier(Map<String, Object> currentGraph) {
359
360 }
361
362
363
364
365
366
367
368
369
370 protected void addOtherIdentifier(Map<String, Object> currentGraph, String otherIdentifier) {
371 if (currentGraph.containsKey(Ontology.OTHER_IDENTIFIERS)) {
372 logger.debug("adding alternative id: " + otherIdentifier);
373 Object oids = currentGraph.get(Ontology.OTHER_IDENTIFIERS);
374 if (oids != null && oids instanceof ArrayList<?>) {
375 ((ArrayList<String>) oids).add(otherIdentifier);
376 logger.debug("alternative ID added");
377 }
378 } else {
379 logger.debug("adding first alt id: " + otherIdentifier);
380 List<String> oids = Lists.newArrayList();
381 oids.add(otherIdentifier);
382 currentGraph.put(Ontology.OTHER_IDENTIFIERS, oids);
383 }
384 }
385
386 @Override
387 protected boolean needToCreateSubNode(String qName) {
388
389 boolean need = isUnitDelimiter(qName);
390
391 String path = getMappedProperty(currentPath);
392 if (path != null) {
393 need = need || path.endsWith("AccessPoint");
394 }
395 return need || possibleSubNodes.containsKey(getMappedProperty(currentPath));
396 }
397
398
399
400
401
402
403
404 protected static boolean isUnitDelimiter(String elementName) {
405 return childItemPattern.matcher(elementName).matches() || elementName.equals(ARCHDESC);
406 }
407
408 private void addAuthor(Map<String, Object> currentGraph) {
409 if (getAuthor() != null && !currentGraph.containsKey(AUTHOR)) {
410 currentGraph.put(AUTHOR, getAuthor());
411 }
412 }
413 }