View Javadoc

1   /*
2    * Copyright 2015 Data Archiving and Networked Services (an institute of
3    * Koninklijke Nederlandse Akademie van Wetenschappen), King's College London,
4    * Georg-August-Universitaet Goettingen Stiftung Oeffentlichen Rechts
5    *
6    * Licensed under the EUPL, Version 1.1 or – as soon they will be approved by
7    * the European Commission - subsequent versions of the EUPL (the "Licence");
8    * You may not use this work except in compliance with the Licence.
9    * You may obtain a copy of the Licence at:
10   *
11   * https://joinup.ec.europa.eu/software/page/eupl
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the Licence is distributed on an "AS IS" basis,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the Licence for the specific language governing
17   * permissions and limitations under the Licence.
18   */
19  
20  package eu.ehri.project.importers.util;
21  
22  import com.google.common.base.Charsets;
23  import com.google.common.base.Joiner;
24  import com.google.common.collect.Lists;
25  import com.google.common.collect.Maps;
26  import eu.ehri.project.definitions.Entities;
27  import eu.ehri.project.definitions.Ontology;
28  import eu.ehri.project.exceptions.ValidationError;
29  import eu.ehri.project.importers.properties.NodeProperties;
30  import eu.ehri.project.models.EntityClass;
31  import eu.ehri.project.models.MaintenanceEventType;
32  import eu.ehri.project.models.base.Description;
33  import eu.ehri.project.utils.LanguageHelpers;
34  import org.apache.commons.lang3.StringUtils;
35  import org.slf4j.Logger;
36  import org.slf4j.LoggerFactory;
37  
38  import java.io.BufferedReader;
39  import java.io.IOException;
40  import java.io.InputStream;
41  import java.io.InputStreamReader;
42  import java.util.List;
43  import java.util.Map;
44  
45  /**
46   * Import utility class.
47   */
48  public class ImportHelpers {
49  
50      public static final String LINK_TARGET = "target";
51      public static final String OBJECT_IDENTIFIER = "objectIdentifier";
52  
53      // Keys in the node that denote unknown properties must start with the value of UNKNOWN.
54      public static final String UNKNOWN_PREFIX = "UNKNOWN_";
55      private static final String NODE_PROPERTIES = "allowedNodeProperties.csv";
56  
57      /**
58       * Keys in the graph that encode a language code must start with the LANGUAGE_KEY_PREFIX.
59       */
60      private static final String LANGUAGE_KEY_PREFIX = "language";
61  
62      private static final Logger logger = LoggerFactory.getLogger(ImportHelpers.class);
63      private static final Joiner stringJoiner = Joiner.on("\n\n").skipNulls();
64      private static final NodeProperties nodeProperties = loadNodeProperties();
65  
66      /**
67       * Extract properties from the itemData Map that are marked as unknown, and return them in a new Map.
68       *
69       * @param itemData a Map containing raw properties of a unit
70       * @return returns a Map with all keys from itemData that start with SaxXmlHandler.UNKNOWN
71       * @throws ValidationError never
72       */
73      public static Map<String, Object> extractUnknownProperties(Map<String, Object> itemData) throws ValidationError {
74          Map<String, Object> unknowns = Maps.newHashMap();
75          for (Map.Entry<String, Object> key : itemData.entrySet()) {
76              if (key.getKey().startsWith(UNKNOWN_PREFIX)) {
77                  unknowns.put(key.getKey().substring(UNKNOWN_PREFIX.length()), key.getValue());
78              }
79          }
80          return unknowns;
81      }
82  
83      /**
84       * only properties that have the multivalued-status can actually be multivalued. all other properties will be
85       * flattened by this method.
86       *
87       * @param key    a property key
88       * @param value  a property value
89       * @param entity the EntityClass with which this frameMap must comply
90       */
91      public static Object flattenNonMultivaluedProperties(String key, Object value, EntityClass entity) {
92          if (value instanceof List
93                  && !(nodeProperties.hasProperty(entity.getName(), key)
94                  && nodeProperties.isMultivaluedProperty(entity.getName(), key))) {
95              logger.trace("Flattening array property value: {}: {}", key, value);
96              return stringJoiner.join((List) value);
97          } else {
98              return value;
99          }
100     }
101 
102     /**
103      * Extract DocumentaryUnit properties from the itemData and return them as a new Map.
104      * This implementation only extracts the objectIdentifier.
105      * <p>
106      * This implementation does not throw ValidationErrors.
107      *
108      * @param itemData a Map containing raw properties of a DocumentaryUnit
109      * @return a new Map containing the objectIdentifier property
110      * @throws ValidationError never
111      */
112     public static Map<String, Object> extractIdentifiers(Map<String, Object> itemData) throws ValidationError {
113         Map<String, Object> unit = Maps.newHashMap();
114         unit.put(Ontology.IDENTIFIER_KEY, itemData.get(OBJECT_IDENTIFIER));
115         if (itemData.get(Ontology.OTHER_IDENTIFIERS) != null) {
116             logger.debug("otherIdentifiers is not null");
117             unit.put(Ontology.OTHER_IDENTIFIERS, itemData.get(Ontology.OTHER_IDENTIFIERS));
118         }
119         return unit;
120     }
121 
122     /**
123      * Extract a Map containing the properties of a generic description.
124      * Excludes unknown properties, object identifier(s), maintenance events,
125      * relations, addresses and access point relations.
126      *
127      * @param itemData a Map containing raw properties of the description
128      * @param entity   an EntityClass
129      * @return a Map representation of a generic Description
130      */
131     public static Map<String, Object> extractDescription(Map<String, Object> itemData, EntityClass entity) {
132         Map<String, Object> description = Maps.newHashMap();
133 
134         description.put(Ontology.CREATION_PROCESS, Description.CreationProcess.IMPORT.toString());
135 
136         for (Map.Entry<String, Object> itemProperty : itemData.entrySet()) {
137             if (itemProperty.getKey().equals("descriptionIdentifier")) {
138                 description.put(Ontology.IDENTIFIER_KEY, itemProperty.getValue());
139             } else if (!itemProperty.getKey().startsWith(UNKNOWN_PREFIX)
140                     && !itemProperty.getKey().equals(OBJECT_IDENTIFIER)
141                     && !itemProperty.getKey().equals(Ontology.IDENTIFIER_KEY)
142                     && !itemProperty.getKey().equals(Ontology.OTHER_IDENTIFIERS)
143                     && !itemProperty.getKey().startsWith(Entities.MAINTENANCE_EVENT)
144                     && !itemProperty.getKey().startsWith(Entities.ACCESS_POINT)
145                     && !itemProperty.getKey().startsWith("IGNORE")
146                     && !itemProperty.getKey().startsWith("address/")
147                     && !itemProperty.getKey().endsWith("AccessPoint")) {
148                 description.put(itemProperty.getKey(), flattenNonMultivaluedProperties(
149                         itemProperty.getKey(), itemProperty.getValue(), entity));
150             }
151         }
152 
153         return description;
154     }
155 
156     /**
157      * Extract an address node representation from the itemData.
158      *
159      * @param itemData a Map containing raw properties of a unit
160      * @return returns a Map with all address/ keys (may be empty)
161      */
162     public static Map<String, Object> extractAddress(Map<String, Object> itemData) {
163         Map<String, Object> address = Maps.newHashMap();
164         for (Map.Entry<String, Object> itemProperty : itemData.entrySet()) {
165             if (itemProperty.getKey().startsWith("address/")) {
166                 address.put(itemProperty.getKey().substring(8), itemProperty.getValue());
167             }
168         }
169         return address;
170     }
171 
172     /**
173      * Extract a list of entity bundles for DatePeriods from the data,
174      * attempting to parse the unitdate attribute.
175      *
176      * @param data the data map. This is an out parameter from which
177      *             keys associated with extracted dates will be removed
178      */
179     public static List<Map<String, Object>> extractDates(Map<String, Object> data) {
180         return DateParser.extractDates(data);
181     }
182 
183     /**
184      * Extract the data from a sub-node.
185      *
186      * @param event a Map of event properties
187      * @return a data map
188      */
189     public static Map<String, Object> getSubNode(Map<String, Object> event) {
190         Map<String, Object> me = Maps.newHashMap();
191         for (Map.Entry<String, Object> eventEntry : event.entrySet()) {
192             // Hack for EAG 1 and 2012 compatibility - maps maintenance event
193             // types from old to new values
194             if (eventEntry.getKey().equals(Ontology.MAINTENANCE_EVENT_TYPE)) {
195                 me.put(Ontology.MAINTENANCE_EVENT_TYPE, MaintenanceEventType
196                         .withName((String) eventEntry.getValue()).toString());
197             } else {
198                 me.put(eventEntry.getKey(), eventEntry.getValue());
199             }
200         }
201         if (!me.containsKey(Ontology.MAINTENANCE_EVENT_TYPE)) {
202             me.put(Ontology.MAINTENANCE_EVENT_TYPE, MaintenanceEventType.updated.name());
203         }
204         return me;
205     }
206 
207     public static void overwritePropertyInGraph(Map<String, Object> c, String property, String value) {
208         String normValue = normaliseValue(property, value);
209         if (normValue != null && !normValue.isEmpty()) {
210             logger.debug("overwrite property: {} {}", property, normValue);
211             c.put(property, normValue);
212         }
213     }
214 
215     /**
216      * Stores this property value pair in the given graph node representation.
217      * If the value is effectively empty, nothing happens.
218      * If the property already exists, it is added to the value list.
219      *
220      * @param c        a Map representation of a graph node
221      * @param property the key to store the value for
222      * @param value    the value to store
223      */
224     public static void putPropertyInGraph(Map<String, Object> c, String property, String value) {
225         String normValue = normaliseValue(property, value);
226         if (normValue == null || normValue.isEmpty()) {
227             return;
228         }
229         logger.debug("putProp: {} -> {}", property, normValue);
230         if (c.containsKey(property)) {
231             Object currentValue = c.get(property);
232             if (currentValue instanceof List) {
233                 ((List) currentValue).add(normValue);
234             } else {
235                 c.put(property, Lists.newArrayList(currentValue, normValue));
236             }
237         } else {
238             c.put(property, normValue);
239         }
240     }
241 
242     private static String normaliseValue(String property, String value) {
243         String trimmedValue = StringUtils.normalizeSpace(value);
244         // Language codes are converted to their 3-letter alternates
245         return property.startsWith(LANGUAGE_KEY_PREFIX)
246                 ? LanguageHelpers.iso639DashTwoCode(trimmedValue)
247                 : trimmedValue;
248     }
249 
250     public static List<Map<String, Object>> extractSubNodes(String type, Map<String, Object> data) {
251         List<Map<String, Object>> out = Lists.newArrayList();
252         Object nodes = data.get(type);
253         if (nodes != null && nodes instanceof List) {
254             for (Map<String, Object> event : (List<Map<String, Object>>) nodes) {
255                 out.add(getSubNode(event));
256             }
257         }
258         return out;
259     }
260 
261     // Helpers
262 
263     private static NodeProperties loadNodeProperties() {
264         try (InputStream fis = ImportHelpers.class.getClassLoader().getResourceAsStream(NODE_PROPERTIES);
265              BufferedReader br = new BufferedReader(new InputStreamReader(fis, Charsets.UTF_8))) {
266             NodeProperties nodeProperties = new NodeProperties();
267             String headers = br.readLine();
268             nodeProperties.setTitles(headers);
269 
270             String line;
271             while ((line = br.readLine()) != null) {
272                 nodeProperties.addRow(line);
273             }
274             return nodeProperties;
275         } catch (IOException ex) {
276             throw new RuntimeException(ex);
277         } catch (NullPointerException npe) {
278             throw new RuntimeException("Missing or empty properties file: " + NODE_PROPERTIES);
279         }
280     }
281 }