View Javadoc

1   /*
2    * Copyright 2015 Data Archiving and Networked Services (an institute of
3    * Koninklijke Nederlandse Akademie van Wetenschappen), King's College London,
4    * Georg-August-Universitaet Goettingen Stiftung Oeffentlichen Rechts
5    *
6    * Licensed under the EUPL, Version 1.1 or – as soon they will be approved by
7    * the European Commission - subsequent versions of the EUPL (the "Licence");
8    * You may not use this work except in compliance with the Licence.
9    * You may obtain a copy of the Licence at:
10   *
11   * https://joinup.ec.europa.eu/software/page/eupl
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the Licence is distributed on an "AS IS" basis,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the Licence for the specific language governing
17   * permissions and limitations under the Licence.
18   */
19  
20  package eu.ehri.project.importers.xml;
21  
22  import com.google.common.collect.ImmutableMap;
23  import eu.ehri.project.definitions.Entities;
24  import eu.ehri.project.definitions.Ontology;
25  import eu.ehri.project.exceptions.ValidationError;
26  import eu.ehri.project.importers.base.ItemImporter;
27  import eu.ehri.project.importers.base.SaxXmlHandler;
28  import eu.ehri.project.importers.ead.EadImporter;
29  import eu.ehri.project.importers.properties.XmlImportProperties;
30  import eu.ehri.project.importers.util.ImportHelpers;
31  import eu.ehri.project.models.Annotation;
32  import eu.ehri.project.models.base.Entity;
33  import org.slf4j.Logger;
34  import org.slf4j.LoggerFactory;
35  import org.xml.sax.SAXException;
36  
37  import java.util.Map;
38  import java.util.Stack;
39  
40  /**
41   * Dublin Core files are imported directly beneath the scope provided.
42   * There is NO structure beneath that.
43   */
44  public class DcEuropeanaHandler extends SaxXmlHandler {
45  
46      private static final Logger logger = LoggerFactory.getLogger(DcEuropeanaHandler.class);
47      private final ImmutableMap<String, Class<? extends Entity>> possibleSubNodes = ImmutableMap.of(
48              Entities.ACCESS_POINT, Annotation.class
49      );
50  
51      public DcEuropeanaHandler(ItemImporter<Map<String, Object>, ?> importer, XmlImportProperties xmlImportProperties) {
52          super(importer, xmlImportProperties);
53      }
54  
55  
56      @Override
57      public void endElement(String uri, String localName, String qName) throws SAXException {
58          //the child closes, add the new DocUnit to the list, establish some relations
59          super.endElement(uri, localName, qName);
60  
61          if (needToCreateSubNode(qName)) {
62              Map<String, Object> currentMap = currentGraphPath.pop();
63              if (isUnitDelimiter(qName)) {
64                  try {
65                      //we're back at the top. find the maintenanceevents and add to the topLevel DU
66                      currentMap.put("languageCode", "nld");
67  
68                      extractIdentifier(currentMap);
69                      extractName(currentMap);
70  
71                      ImportHelpers.putPropertyInGraph(currentMap, "sourceFileId", currentMap.get(ImportHelpers.OBJECT_IDENTIFIER).toString());
72                      importer.importItem(currentMap, new Stack<>());
73  //                importer.importTopLevelExtraNodes(topLevel, current);
74                      //importer.importItem(currentGraphPath.pop(), Lists.<String>newArrayList());
75                  } catch (ValidationError ex) {
76                      logger.error(ex.getMessage());
77                  }
78              } else {
79                  putSubGraphInCurrentGraph(getMappedProperty(currentPath), currentMap);
80                  depth--;
81              }
82          }
83  
84          currentPath.pop();
85      }
86  
87      @Override
88      protected boolean needToCreateSubNode(String qName) {
89          boolean need = isUnitDelimiter(qName);
90          need = need || possibleSubNodes.containsKey(getMappedProperty(currentPath));
91          String path = getMappedProperty(currentPath);
92          logger.debug(path);
93          return need || path.endsWith(EadImporter.ACCESS_POINT);
94      }
95  
96  //    private String replaceOpname(String toString) {
97  //        //done especially for the BBWO2 set
98  //         String date =  toString.replace(" (Opname)", "").replace(" (Vrijgegeven)", "").replace(" (circa)", "");
99  //                     //dceuropeana
100 //        Pattern dcdate = Pattern.compile("^(\\d{1,2})-(\\d{1,2})-(\\d{4})$");
101 //        Matcher m = dcdate.matcher(date);
102 //        if(m.matches()){
103 //            date = m.group(3)+"-"+m.group(2)+"-"+m.group(1);
104 //        }
105 //        logger.debug(date);
106 //        return date;
107 //    }
108 
109     private boolean isUnitDelimiter(String qName) {
110         return qName.equals("europeana:record");
111     }
112 
113     private void extractName(Map<String, Object> currentMap) {
114         if (!currentMap.containsKey(Ontology.NAME_KEY) && currentMap.containsKey("scopeAndContent")) {
115             String name;
116             String scope = currentMap.get("scopeAndContent").toString();
117             if (scope.length() > 50) {
118                 if (scope.indexOf(" ", 50) >= 0) {
119                     name = scope.substring(0, scope.indexOf(" ", 50));
120                 } else {
121                     name = scope.substring(0, 50);
122                 }
123                 name += " ...";
124             } else {
125                 name = scope;
126             }
127             currentMap.put(Ontology.NAME_KEY, name);
128         }
129     }
130 
131     private void extractIdentifier(Map<String, Object> currentMap) {
132         if (currentMap.containsKey(ImportHelpers.OBJECT_IDENTIFIER)) {
133             logger.debug(currentMap.get(ImportHelpers.OBJECT_IDENTIFIER) + "");
134             String id = currentMap.get(ImportHelpers.OBJECT_IDENTIFIER).toString();
135             if (id.startsWith("http://www.beeldbankwo2.nl/detail_no.jsp?action=detail&imid=")) {
136                 currentMap.put(ImportHelpers.OBJECT_IDENTIFIER, id.substring(60));
137             }
138         } else {
139             for (String key : currentMap.keySet())
140                 logger.debug(key);
141         }
142     }
143 
144 }