View Javadoc

1   package eu.ehri.project.exporters.ead;
2   
3   import com.google.common.base.Preconditions;
4   import com.google.common.collect.ImmutableList;
5   import com.google.common.collect.ImmutableMap;
6   import com.google.common.collect.Lists;
7   import com.google.common.collect.Maps;
8   import com.typesafe.config.Config;
9   import com.typesafe.config.ConfigFactory;
10  import eu.ehri.project.api.Api;
11  import eu.ehri.project.api.QueryApi;
12  import eu.ehri.project.definitions.ContactInfo;
13  import eu.ehri.project.definitions.Entities;
14  import eu.ehri.project.definitions.EventTypes;
15  import eu.ehri.project.definitions.IsadG;
16  import eu.ehri.project.definitions.Ontology;
17  import eu.ehri.project.exporters.xml.AbstractStreamingXmlExporter;
18  import eu.ehri.project.models.AccessPoint;
19  import eu.ehri.project.models.AccessPointType;
20  import eu.ehri.project.models.Address;
21  import eu.ehri.project.models.DatePeriod;
22  import eu.ehri.project.models.DocumentaryUnit;
23  import eu.ehri.project.models.DocumentaryUnitDescription;
24  import eu.ehri.project.models.Link;
25  import eu.ehri.project.models.Repository;
26  import eu.ehri.project.models.RepositoryDescription;
27  import eu.ehri.project.models.base.Description;
28  import eu.ehri.project.models.base.Entity;
29  import eu.ehri.project.models.cvoc.AuthoritativeItem;
30  import eu.ehri.project.models.events.SystemEvent;
31  import eu.ehri.project.utils.LanguageHelpers;
32  import org.joda.time.DateTime;
33  import org.joda.time.format.DateTimeFormat;
34  import org.joda.time.format.DateTimeFormatter;
35  import org.slf4j.Logger;
36  import org.slf4j.LoggerFactory;
37  
38  import javax.xml.stream.XMLStreamWriter;
39  import java.util.Collections;
40  import java.util.List;
41  import java.util.Map;
42  import java.util.MissingResourceException;
43  import java.util.Optional;
44  import java.util.ResourceBundle;
45  import java.util.Set;
46  import java.util.stream.Collectors;
47  
48  
49  public class Ead2002Exporter extends AbstractStreamingXmlExporter<DocumentaryUnit> implements EadExporter {
50  
51      private static final Logger logger = LoggerFactory.getLogger(Ead2002Exporter.class);
52      private static final Config config = ConfigFactory.load();
53      private static final DateTimeFormatter unitDateNormalFormat = DateTimeFormat.forPattern("YYYYMMdd");
54  
55      private static ResourceBundle i18n = ResourceBundle.getBundle(Ead2002Exporter.class.getName());
56  
57      private static final String DEFAULT_NAMESPACE = "urn:isbn:1-931666-22-9";
58      private static final Map<String, String> NAMESPACES = namespaces(
59              "xlink", "http://www.w3.org/1999/xlink",
60              "xsi", "http://www.w3.org/2001/XMLSchema-instance"
61      );
62  
63      private static final Map<IsadG, String> multiValueTextMappings = ImmutableMap.<IsadG, String>builder()
64              .put(IsadG.archivistNote, "processinfo")
65              .put(IsadG.scopeAndContent, "scopecontent")
66              .put(IsadG.systemOfArrangement, "arrangement")
67              .put(IsadG.publicationNote, "bibliography")
68              .put(IsadG.locationOfCopies, "altformavail")
69              .put(IsadG.locationOfOriginals, "originalsloc")
70              .put(IsadG.biographicalHistory, "bioghist")
71              .put(IsadG.conditionsOfAccess, "accessrestrict")
72              .put(IsadG.conditionsOfReproduction, "userestrict")
73              .put(IsadG.findingAids, "otherfindaid")
74              .put(IsadG.accruals, "accruals")
75              .put(IsadG.acquisition, "acqinfo")
76              .put(IsadG.appraisal, "appraisal")
77              .put(IsadG.archivalHistory, "custodhist")
78              .put(IsadG.physicalCharacteristics, "phystech")
79              .put(IsadG.relatedUnitsOfDescription, "relatedmaterial")
80              .put(IsadG.separatedUnitsOfDescription, "separatedmaterial")
81              .put(IsadG.notes, "odd") // controversial!
82              .build();
83  
84      private static final Map<IsadG, String> textDidMappings = ImmutableMap.<IsadG, String>builder()
85              .put(IsadG.extentAndMedium, "physdesc")
86              .put(IsadG.unitDates, "unitdate")
87              .build();
88  
89      private static final Map<AccessPointType, String> controlAccessMappings = ImmutableMap.<AccessPointType, String>builder()
90              .put(AccessPointType.subject, "subject")
91              .put(AccessPointType.person, "persname")
92              .put(AccessPointType.family, "famname")
93              .put(AccessPointType.corporateBody, "corpname")
94              .put(AccessPointType.place, "geogname")
95              .put(AccessPointType.genre, "genreform")
96              .build();
97  
98      private static final List<ContactInfo> addressKeys = ImmutableList
99              .of(ContactInfo.street,
100                     ContactInfo.postalCode,
101                     ContactInfo.municipality,
102                     ContactInfo.firstdem,
103                     ContactInfo.countryCode,
104                     ContactInfo.telephone,
105                     ContactInfo.fax,
106                     ContactInfo.webpage,
107                     ContactInfo.email);
108 
109     private final Api api;
110 
111     public Ead2002Exporter(Api api) {
112         this.api = api;
113     }
114 
115     @Override
116     public void export(XMLStreamWriter sw, DocumentaryUnit unit, String langCode) {
117 
118         root(sw, "ead", DEFAULT_NAMESPACE, attrs(), NAMESPACES, () -> {
119             attribute(sw, "http://www.w3.org/2001/XMLSchema-instance",
120                     "schemaLocation", DEFAULT_NAMESPACE + " http://www.loc.gov/ead/ead.xsd");
121 
122             Repository repository = unit.getRepository();
123             Optional<Description> descOpt = LanguageHelpers.getBestDescription(
124                     unit, Optional.empty(), langCode);
125 
126             tag(sw, "eadheader", attrs("relatedencoding", "DC",
127                     "scriptencoding", "iso15924",
128                     "repositoryencoding", "iso15511",
129                     "dateencoding", "iso8601",
130                     "countryencoding", "iso3166-1"), () -> {
131 
132                 tag(sw, "eadid", unit.getId());
133                 descOpt.ifPresent(desc -> {
134                     addFileDesc(sw, langCode, repository, desc);
135                     addProfileDesc(sw, desc);
136                 });
137                 addRevisionDesc(sw, unit);
138             });
139 
140             descOpt.ifPresent(desc -> {
141                 tag(sw, "archdesc", getLevelAttrs(descOpt, "collection"), () -> {
142                     addDataSection(sw, repository, unit, desc, langCode);
143                     addPropertyValues(sw, desc);
144                     Iterable<DocumentaryUnit> orderedChildren = getOrderedChildren(unit);
145                     if (orderedChildren.iterator().hasNext()) {
146                         tag(sw, "dsc", () -> {
147                             for (DocumentaryUnit child : orderedChildren) {
148                                 addEadLevel(sw, 1, child, descOpt, langCode);
149                             }
150                         });
151                     }
152                     addControlAccess(sw, desc);
153                 });
154             });
155         });
156     }
157 
158     private void addProfileDesc(XMLStreamWriter sw, Description desc) {
159         tag(sw, "profiledesc", () -> {
160             tag(sw, "creation", () -> {
161                 characters(sw, resourceAsString("export-boilerplate.txt"));
162                 DateTime now = DateTime.now();
163                 tag(sw, "date", now.toString(), attrs("normal", unitDateNormalFormat.print(now)
164                 ));
165             });
166             tag(sw, "langusage", () -> tag(sw, "language",
167                     LanguageHelpers.codeToName(desc.getLanguageOfDescription()),
168                     attrs("langcode", desc.getLanguageOfDescription())
169             ));
170             Optional.ofNullable(desc.<String>getProperty(IsadG.rulesAndConventions)).ifPresent(value ->
171                     tag(sw, "descrules", value, attrs("encodinganalog", "3.7.2"))
172             );
173         });
174     }
175 
176     private void addFileDesc(XMLStreamWriter sw, String langCode, Repository repository, Description desc) {
177         tag(sw, "filedesc", () -> {
178             tag(sw, "titlestmt", () -> tag(sw, "titleproper", desc.getName()));
179             tag(sw, "publicationstmt", () -> {
180                 LanguageHelpers.getBestDescription(
181                         repository, Optional.empty(), langCode).ifPresent(repoDesc -> {
182                     tag(sw, "publisher", repoDesc.getName());
183                     for (Address address : repoDesc.as(RepositoryDescription.class).getAddresses()) {
184                         tag(sw, "address", () -> {
185                             for (ContactInfo key : addressKeys) {
186                                 for (Object v : coerceList(address.getProperty(key))) {
187                                     tag(sw, "addressline", v.toString());
188                                 }
189                             }
190                             tag(sw, "addressline",
191                                     LanguageHelpers.countryCodeToName(
192                                             repository.getCountry().getId()));
193                         });
194                     }
195                 });
196             });
197             if (Description.CreationProcess.IMPORT.equals(desc.getCreationProcess())) {
198                 tag(sw, ImmutableList.of("notestmt", "note", "p"), resourceAsString("creationprocess-boilerplate.txt"));
199             }
200         });
201     }
202 
203     private void addRevisionDesc(XMLStreamWriter sw, DocumentaryUnit unit) {
204         if (config.getBoolean("io.export.ead.includeRevisions")) {
205             List<List<SystemEvent>> eventList = Lists.newArrayList(api.events().aggregateForItem(unit));
206             if (!eventList.isEmpty()) {
207                 tag(sw, "revisiondesc", () -> {
208                     for (List<SystemEvent> agg : eventList) {
209                         SystemEvent event = agg.get(0);
210                         String eventDesc = getEventDescription(event.getEventType());
211                         tag(sw, "change", () -> {
212                             tag(sw, "date", new DateTime(event.getTimestamp()).toString());
213                             if (event.getLogMessage() == null || event.getLogMessage().isEmpty()) {
214                                 tag(sw, "item", eventDesc);
215                             } else {
216                                 tag(sw, "item", String.format("%s [%s]",
217                                         event.getLogMessage(), eventDesc));
218                             }
219                         });
220                     }
221                 });
222             }
223         }
224     }
225 
226     private void addDataSection(XMLStreamWriter sw, Repository repository, DocumentaryUnit subUnit,
227             Description desc, String langCode) {
228         tag(sw, "did", () -> {
229             tag(sw, "unitid", subUnit.getIdentifier());
230             tag(sw, "unittitle", desc.getName(), attrs("encodinganalog", "3.1.2"));
231 
232             for (DatePeriod datePeriod : desc.as(DocumentaryUnitDescription.class).getDatePeriods()) {
233                 if (DatePeriod.DatePeriodType.creation.equals(datePeriod.getDateType())) {
234                     String start = datePeriod.getStartDate();
235                     String end = datePeriod.getEndDate();
236                     if (start != null && end != null) {
237                         DateTime startDateTime = new DateTime(start);
238                         DateTime endDateTime = new DateTime(end);
239                         String normal = String.format("%s/%s",
240                                 unitDateNormalFormat.print(startDateTime),
241                                 unitDateNormalFormat.print(endDateTime));
242                         String text = String.format("%s/%s",
243                                 startDateTime.year().get(), endDateTime.year().get());
244                         tag(sw, "unitdate", text, attrs("normal", normal, "encodinganalog", "3.1.3"));
245                     } else if (start != null) {
246                         DateTime startDateTime = new DateTime(start);
247                         String normal = String.format("%s",
248                                 unitDateNormalFormat.print(startDateTime));
249                         String text = String.format("%s", startDateTime.year().get());
250                         tag(sw, "unitdate", text, attrs("normal", normal, "encodinganalog", "3.1.3"));
251                     }
252                 }
253             }
254 
255             Set<String> propertyKeys = desc.getPropertyKeys();
256             for (Map.Entry<IsadG, String> pair : textDidMappings.entrySet()) {
257                 if (propertyKeys.contains(pair.getKey().name())) {
258                     for (Object v : coerceList(desc.getProperty(pair.getKey()))) {
259                         tag(sw, pair.getValue(), v.toString(), textFieldAttrs(pair.getKey()));
260                     }
261                 }
262             }
263 
264             if (propertyKeys.contains(IsadG.languageOfMaterial.name())) {
265                 tag(sw, "langmaterial", () -> {
266                     for (Object v : coerceList(desc.getProperty(IsadG.languageOfMaterial))) {
267                         String langName = LanguageHelpers.codeToName(v.toString());
268                         if (v.toString().length() != 3) {
269                             tag(sw, "language", langName, textFieldAttrs(IsadG.languageOfMaterial));
270                         } else {
271                             tag(sw, "language", langName, textFieldAttrs(IsadG.languageOfMaterial, "langcode", v
272                                     .toString()));
273                         }
274                     }
275                 });
276             }
277 
278             Optional.ofNullable(repository).ifPresent(repo -> {
279                 LanguageHelpers.getBestDescription(repo, Optional.empty(), langCode).ifPresent(repoDesc ->
280                         tag(sw, "repository", () ->
281                                 tag(sw, "corpname", repoDesc.getName()))
282                 );
283             });
284         });
285     }
286 
287     private void addEadLevel(XMLStreamWriter sw, int num, DocumentaryUnit subUnit,
288             Optional<Description> priorDescOpt, String langCode) {
289         logger.trace("Adding EAD sublevel: c" + num);
290         Optional<Description> descOpt = LanguageHelpers.getBestDescription(subUnit, priorDescOpt, langCode);
291         String levelTag = String.format("c%02d", num);
292         tag(sw, levelTag, getLevelAttrs(descOpt, null), () -> {
293             descOpt.ifPresent(desc -> {
294                 addDataSection(sw, null, subUnit, desc, langCode);
295                 addPropertyValues(sw, desc);
296                 addControlAccess(sw, desc);
297             });
298 
299             for (DocumentaryUnit child : getOrderedChildren(subUnit)) {
300                 addEadLevel(sw, num + 1, child, descOpt, langCode);
301             }
302         });
303     }
304 
305     private void addControlAccess(XMLStreamWriter sw, Description desc) {
306         Map<AccessPointType, List<AccessPoint>> byType = Maps.newHashMap();
307         for (AccessPoint accessPoint : desc.getAccessPoints()) {
308             AccessPointType type = accessPoint.getRelationshipType();
309             if (controlAccessMappings.containsKey(type)) {
310                 if (byType.containsKey(type)) {
311                     byType.get(type).add(accessPoint);
312                 } else {
313                     byType.put(type, Lists.newArrayList(accessPoint));
314                 }
315             }
316         }
317 
318         for (Map.Entry<AccessPointType, List<AccessPoint>> entry : byType.entrySet()) {
319             tag(sw, "controlaccess", () -> {
320                 AccessPointType type = entry.getKey();
321                 for (AccessPoint accessPoint : entry.getValue()) {
322                     tag(sw, controlAccessMappings.get(type), accessPoint.getName(),
323                             getAccessPointAttributes(accessPoint));
324                 }
325             });
326         }
327     }
328 
329     private Map<String, String> getAccessPointAttributes(AccessPoint accessPoint) {
330         for (Link link : accessPoint.getLinks()) {
331             for (Entity target : link.getLinkTargets()) {
332                 if (target.getType().equals(Entities.CVOC_CONCEPT) ||
333                         target.getType().equals(Entities.HISTORICAL_AGENT)) {
334                     AuthoritativeItem item = target.as(AuthoritativeItem.class);
335                     try {
336                         return ImmutableMap.of(
337                                 "source", item.getAuthoritativeSet().getId(),
338                                 "authfilenumber", item.getIdentifier()
339                         );
340                     } catch (NullPointerException e) {
341                         logger.warn("Authoritative item with missing set: {}", item.getId());
342                     }
343                 }
344             }
345         }
346         return Collections.emptyMap();
347     }
348 
349     private void addPropertyValues(XMLStreamWriter sw, Entity item) {
350         Set<String> available = item.getPropertyKeys();
351         for (Map.Entry<IsadG, String> pair : multiValueTextMappings.entrySet()) {
352             if (available.contains(pair.getKey().name())) {
353                 for (Object v : coerceList(item.getProperty(pair.getKey()))) {
354                     tag(sw, pair.getValue(), textFieldAttrs(pair.getKey()),
355                             () -> tag(sw, "p", () -> cData(sw, v.toString()))
356                     );
357                 }
358             }
359         }
360         for (Object v : coerceList(item.getProperty(IsadG.datesOfDescriptions))) {
361             tag(sw, "processinfo", textFieldAttrs(IsadG.datesOfDescriptions), () -> {
362                 tag(sw, Lists.newArrayList("p", "date"), () -> cData(sw, v.toString()));
363             });
364         }
365         if (available.contains(IsadG.sources.name())) {
366             tag(sw, "processinfo", textFieldAttrs(IsadG.sources, "type", "Sources"), () -> {
367                 tag(sw, "p", () -> {
368                     for (Object v : coerceList(item.getProperty(IsadG.sources))) {
369                         tag(sw, "bibref", () -> cData(sw, v.toString()));
370                     }
371                 });
372             });
373         }
374     }
375 
376     private Map<String, String> textFieldAttrs(IsadG field, String... kvs) {
377         Preconditions.checkArgument(kvs.length % 2 == 0);
378         Map<String, String> attrs = field.getAnalogueEncoding()
379                 .map(Collections::singleton)
380                 .orElse(Collections.emptySet())
381                 .stream().collect(Collectors.toMap(e -> "encodinganalog", e -> e));
382         for (int i = 0; i < kvs.length; i += 2) {
383             attrs.put(kvs[0], kvs[i + 1]);
384         }
385         return attrs;
386     }
387 
388     private Map<String, String> getLevelAttrs(Optional<Description> descOpt, String defaultLevel) {
389         String level = descOpt
390                 .map(d -> d.<String>getProperty(IsadG.levelOfDescription))
391                 .orElse(defaultLevel);
392         return level != null ? ImmutableMap.of("level", level) : Collections.emptyMap();
393     }
394 
395     // Sort the children by identifier. FIXME: This might be a bad assumption!
396     private Iterable<DocumentaryUnit> getOrderedChildren(DocumentaryUnit unit) {
397         return api
398                 .query()
399                 .orderBy(Ontology.IDENTIFIER_KEY, QueryApi.Sort.ASC)
400                 .setLimit(-1)
401                 .setStream(true)
402                 .page(unit.getChildren(), DocumentaryUnit.class);
403     }
404 
405     private String getEventDescription(EventTypes eventType) {
406         try {
407             return i18n.getString(eventType.name());
408         } catch (MissingResourceException e) {
409             return eventType.name();
410         }
411     }
412 }