1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package eu.ehri.project.importers.base;
21
22 import com.google.common.collect.Lists;
23 import com.google.common.collect.Maps;
24 import eu.ehri.project.importers.properties.XmlImportProperties;
25 import eu.ehri.project.importers.util.ImportHelpers;
26 import org.slf4j.Logger;
27 import org.slf4j.LoggerFactory;
28 import org.xml.sax.Attributes;
29 import org.xml.sax.ContentHandler;
30 import org.xml.sax.Locator;
31 import org.xml.sax.SAXException;
32 import org.xml.sax.ext.LexicalHandler;
33 import org.xml.sax.helpers.DefaultHandler;
34
35 import java.util.List;
36 import java.util.Map;
37 import java.util.Optional;
38 import java.util.Stack;
39
40 import static eu.ehri.project.definitions.Ontology.LANGUAGE_OF_DESCRIPTION;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59 public abstract class SaxXmlHandler extends DefaultHandler implements LexicalHandler, ContentHandler {
60
61 private static final Logger logger = LoggerFactory.getLogger(SaxXmlHandler.class);
62
63 protected Locator locator;
64
65
66
67
68 protected final Stack<Map<String, Object>> currentGraphPath = new Stack<>();
69 protected final Map<String, Map<String, Object>> languageMap = Maps.newHashMap();
70 protected final Stack<String> currentPath = new Stack<>();
71 protected final Stack<StringBuilder> currentText = new Stack<>();
72
73 protected String currentEntity;
74
75 protected final ItemImporter<Map<String, Object>, ?> importer;
76 protected final XmlImportProperties properties;
77
78 protected int depth;
79 private String attribute;
80 private String languagePrefix;
81
82 public SaxXmlHandler(ItemImporter<Map<String, Object>, ?> importer) {
83 this(importer, null);
84 }
85
86 public SaxXmlHandler(ItemImporter<Map<String, Object>, ?> importer, XmlImportProperties properties) {
87 super();
88 this.importer = importer;
89 this.properties = properties;
90 currentGraphPath.push(Maps.<String, Object>newHashMap());
91 }
92
93
94
95
96
97
98
99
100 protected abstract boolean needToCreateSubNode(String qName);
101
102 @Override
103 public void startEntity(String name) {
104 currentEntity = name;
105 }
106
107 @Override
108 public void endEntity(String name) {
109 currentEntity = null;
110 }
111
112 @Override
113 public void startDTD(String name, String publicId, String systemId) {
114 }
115
116 @Override
117 public void setDocumentLocator(Locator locator) {
118 this.locator = locator;
119 }
120
121 @Override
122 public void endDTD() {
123 }
124
125 @Override
126 public void comment(char[] ch, int start, int end) {
127 }
128
129 @Override
130 public void startCDATA() {
131 }
132
133 @Override
134 public void endCDATA() {
135 }
136
137
138
139
140
141
142
143
144 @Override
145 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
146
147 currentText.push(new StringBuilder());
148
149
150
151 Optional<String> lang = languageAttribute(attributes);
152 if (lang.isPresent()) {
153 languagePrefix = lang.get();
154 if (!languageMap.containsKey(languagePrefix)) {
155 if (languageMap.isEmpty()) {
156 currentGraphPath.peek().put(LANGUAGE_OF_DESCRIPTION, languageMap);
157 }
158 Map<String, Object> m = Maps.newHashMap();
159 m.put(LANGUAGE_OF_DESCRIPTION, languagePrefix);
160 languageMap.put(languagePrefix, m);
161 }
162 }
163
164
165 currentPath.push(withoutNamespace(qName));
166 if (needToCreateSubNode(qName)) {
167 depth++;
168 logger.debug("Pushing depth... {} -> {}", depth, qName);
169 currentGraphPath.push(Maps.<String, Object>newHashMap());
170 }
171
172
173 for (int attr = 0; attr < attributes.getLength(); attr++) {
174 String attributeName = withoutNamespace(attributes.getQName(attr));
175 if (properties.hasAttributeProperty(attributeName)
176 && !properties.getAttributeProperty(attributeName).equals(LANGUAGE_OF_DESCRIPTION)) {
177
178 if (isKeyInPropertyFile(currentPath, "@" + properties.getAttributeProperty(attributeName), "")) {
179 String path = getMappedProperty(currentPath, "@" + properties.getAttributeProperty(attributeName), "");
180 putPropertyInCurrentGraph(path, attributes.getValue(attr));
181 } else if (isKeyInPropertyFile(currentPath, "@" + properties.getAttributeProperty(attributeName), "$" + attributes.getValue(attr))) {
182 attribute = getMappedProperty(currentPath, "@" + properties.getAttributeProperty(attributeName), "$" + attributes.getValue(attr));
183 } else {
184 logger.debug("attribute {} not found in properties", attributeName);
185 }
186 }
187 }
188 }
189
190
191
192
193
194 @Override
195 public void endElement(String uri, String localName, String qName) throws SAXException {
196 if (languagePrefix == null) {
197 if (attribute == null) {
198 putPropertyInCurrentGraph(getMappedProperty(currentPath), currentText.pop().toString());
199 } else {
200 putPropertyInCurrentGraph(attribute, currentText.pop().toString());
201 attribute = null;
202 }
203 } else {
204 ImportHelpers.putPropertyInGraph(languageMap.get(languagePrefix), getMappedProperty(currentPath), currentText.pop().toString());
205 }
206 }
207
208
209
210
211
212
213
214
215
216
217
218 @SuppressWarnings("unchecked")
219 protected void putSubGraphInCurrentGraph(String key, Map<String, Object> subgraph) {
220 Map<String, Object> c = currentGraphPath.peek();
221 if (c.containsKey(key)) {
222 ((List<Map<String, Object>>) c.get(key)).add(subgraph);
223 } else {
224 c.put(key, Lists.newArrayList(subgraph));
225 }
226 }
227
228
229
230
231
232
233
234
235 private Optional<String> languageAttribute(Attributes attributes) {
236 for (int attr = 0; attr < attributes.getLength(); attr++) {
237 String isLangAttribute = withoutNamespace(attributes.getQName(attr));
238 String prop = properties.getAttributeProperty(isLangAttribute);
239 if (LANGUAGE_OF_DESCRIPTION.equals(prop)) {
240 logger.debug("Language detected!");
241 return Optional.of(attributes.getValue(attr));
242 }
243 }
244 return Optional.empty();
245 }
246
247
248
249
250
251
252
253 private String withoutNamespace(String qName) {
254 return qName.substring(qName.indexOf(":") + 1);
255 }
256
257
258
259
260
261
262 @Override
263 public void characters(char ch[], int start, int length) throws SAXException {
264
265
266
267
268
269 currentText.peek().append(ch, start, length);
270 }
271
272
273
274
275
276
277
278
279 protected void putPropertyInCurrentGraph(String property, String value) {
280 ImportHelpers.putPropertyInGraph(currentGraphPath.peek(), property, value);
281 }
282
283
284
285
286
287
288
289 protected void overwritePropertyInCurrentGraph(String property, String value) {
290 ImportHelpers.overwritePropertyInGraph(currentGraphPath.peek(), property, value);
291 }
292
293
294
295
296
297
298
299
300
301
302
303 protected String getMappedProperty(Stack<String> path) {
304 return getMappedProperty(path, "", "");
305 }
306
307
308
309
310
311
312
313
314
315
316
317
318
319 private String getMappedProperty(Stack<String> path, String attribute, String value) {
320 StringBuilder all = new StringBuilder();
321 for (int i = path.size(); i > 0; i--) {
322 all.insert(0, path.get(i - 1) + "/");
323 String key = properties.getProperty(all + attribute + escapeValueForKey(value));
324 if (key != null) {
325 return key;
326 }
327 }
328 return ImportHelpers.UNKNOWN_PREFIX + all.toString().replace("/", "_");
329 }
330
331
332
333
334
335
336
337
338
339
340
341 private boolean isKeyInPropertyFile(Stack<String> path, String attribute, String value) {
342 logger.trace("Checking for key in property file: {}, {}, {}", path, attribute, value);
343 String all = "";
344 for (int i = path.size(); i > 0; i--) {
345 all = path.get(i - 1) + "/" + all;
346 String key = all + attribute + escapeValueForKey(value);
347 if (properties.getProperty(key) != null) {
348 logger.trace(" FOUND Path key: {}", key);
349 return true;
350 }
351 }
352 return false;
353 }
354
355 private String escapeValueForKey(String value) {
356 return value.replaceAll("[\\s=:]", "_");
357 }
358
359
360
361
362 protected void printGraph() {
363 for (String key : currentGraphPath.peek().keySet()) {
364 System.out.println(key + ":" + currentGraphPath.peek().get(key));
365 }
366 }
367 }