1 package eu.ehri.project.exporters.ead;
2
3 import com.google.common.base.Preconditions;
4 import com.google.common.collect.ImmutableList;
5 import com.google.common.collect.ImmutableMap;
6 import com.google.common.collect.Lists;
7 import com.google.common.collect.Maps;
8 import com.typesafe.config.Config;
9 import com.typesafe.config.ConfigFactory;
10 import eu.ehri.project.api.Api;
11 import eu.ehri.project.api.QueryApi;
12 import eu.ehri.project.definitions.ContactInfo;
13 import eu.ehri.project.definitions.Entities;
14 import eu.ehri.project.definitions.EventTypes;
15 import eu.ehri.project.definitions.IsadG;
16 import eu.ehri.project.definitions.Ontology;
17 import eu.ehri.project.exporters.xml.AbstractStreamingXmlExporter;
18 import eu.ehri.project.models.AccessPoint;
19 import eu.ehri.project.models.AccessPointType;
20 import eu.ehri.project.models.Address;
21 import eu.ehri.project.models.DatePeriod;
22 import eu.ehri.project.models.DocumentaryUnit;
23 import eu.ehri.project.models.DocumentaryUnitDescription;
24 import eu.ehri.project.models.Link;
25 import eu.ehri.project.models.Repository;
26 import eu.ehri.project.models.RepositoryDescription;
27 import eu.ehri.project.models.base.Description;
28 import eu.ehri.project.models.base.Entity;
29 import eu.ehri.project.models.cvoc.AuthoritativeItem;
30 import eu.ehri.project.models.events.SystemEvent;
31 import eu.ehri.project.utils.LanguageHelpers;
32 import org.joda.time.DateTime;
33 import org.joda.time.format.DateTimeFormat;
34 import org.joda.time.format.DateTimeFormatter;
35 import org.slf4j.Logger;
36 import org.slf4j.LoggerFactory;
37
38 import javax.xml.stream.XMLStreamWriter;
39 import java.util.Collections;
40 import java.util.List;
41 import java.util.Map;
42 import java.util.MissingResourceException;
43 import java.util.Optional;
44 import java.util.ResourceBundle;
45 import java.util.Set;
46 import java.util.stream.Collectors;
47
48
49 public class Ead2002Exporter extends AbstractStreamingXmlExporter<DocumentaryUnit> implements EadExporter {
50
51 private static final Logger logger = LoggerFactory.getLogger(Ead2002Exporter.class);
52 private static final Config config = ConfigFactory.load();
53 private static final DateTimeFormatter unitDateNormalFormat = DateTimeFormat.forPattern("YYYYMMdd");
54
55 private static ResourceBundle i18n = ResourceBundle.getBundle(Ead2002Exporter.class.getName());
56
57 private static final String DEFAULT_NAMESPACE = "urn:isbn:1-931666-22-9";
58 private static final Map<String, String> NAMESPACES = namespaces(
59 "xlink", "http://www.w3.org/1999/xlink",
60 "xsi", "http://www.w3.org/2001/XMLSchema-instance"
61 );
62
63 private static final Map<IsadG, String> multiValueTextMappings = ImmutableMap.<IsadG, String>builder()
64 .put(IsadG.archivistNote, "processinfo")
65 .put(IsadG.scopeAndContent, "scopecontent")
66 .put(IsadG.systemOfArrangement, "arrangement")
67 .put(IsadG.publicationNote, "bibliography")
68 .put(IsadG.locationOfCopies, "altformavail")
69 .put(IsadG.locationOfOriginals, "originalsloc")
70 .put(IsadG.biographicalHistory, "bioghist")
71 .put(IsadG.conditionsOfAccess, "accessrestrict")
72 .put(IsadG.conditionsOfReproduction, "userestrict")
73 .put(IsadG.findingAids, "otherfindaid")
74 .put(IsadG.accruals, "accruals")
75 .put(IsadG.acquisition, "acqinfo")
76 .put(IsadG.appraisal, "appraisal")
77 .put(IsadG.archivalHistory, "custodhist")
78 .put(IsadG.physicalCharacteristics, "phystech")
79 .put(IsadG.relatedUnitsOfDescription, "relatedmaterial")
80 .put(IsadG.separatedUnitsOfDescription, "separatedmaterial")
81 .put(IsadG.notes, "odd")
82 .build();
83
84 private static final Map<IsadG, String> textDidMappings = ImmutableMap.<IsadG, String>builder()
85 .put(IsadG.extentAndMedium, "physdesc")
86 .put(IsadG.unitDates, "unitdate")
87 .build();
88
89 private static final Map<AccessPointType, String> controlAccessMappings = ImmutableMap.<AccessPointType, String>builder()
90 .put(AccessPointType.subject, "subject")
91 .put(AccessPointType.person, "persname")
92 .put(AccessPointType.family, "famname")
93 .put(AccessPointType.corporateBody, "corpname")
94 .put(AccessPointType.place, "geogname")
95 .put(AccessPointType.genre, "genreform")
96 .build();
97
98 private static final List<ContactInfo> addressKeys = ImmutableList
99 .of(ContactInfo.street,
100 ContactInfo.postalCode,
101 ContactInfo.municipality,
102 ContactInfo.firstdem,
103 ContactInfo.countryCode,
104 ContactInfo.telephone,
105 ContactInfo.fax,
106 ContactInfo.webpage,
107 ContactInfo.email);
108
109 private final Api api;
110
111 public Ead2002Exporter(Api api) {
112 this.api = api;
113 }
114
115 @Override
116 public void export(XMLStreamWriter sw, DocumentaryUnit unit, String langCode) {
117
118 root(sw, "ead", DEFAULT_NAMESPACE, attrs(), NAMESPACES, () -> {
119 attribute(sw, "http://www.w3.org/2001/XMLSchema-instance",
120 "schemaLocation", DEFAULT_NAMESPACE + " http://www.loc.gov/ead/ead.xsd");
121
122 Repository repository = unit.getRepository();
123 Optional<Description> descOpt = LanguageHelpers.getBestDescription(
124 unit, Optional.empty(), langCode);
125
126 tag(sw, "eadheader", attrs("relatedencoding", "DC",
127 "scriptencoding", "iso15924",
128 "repositoryencoding", "iso15511",
129 "dateencoding", "iso8601",
130 "countryencoding", "iso3166-1"), () -> {
131
132 tag(sw, "eadid", unit.getId());
133 descOpt.ifPresent(desc -> {
134 addFileDesc(sw, langCode, repository, desc);
135 addProfileDesc(sw, desc);
136 });
137 addRevisionDesc(sw, unit);
138 });
139
140 descOpt.ifPresent(desc -> {
141 tag(sw, "archdesc", getLevelAttrs(descOpt, "collection"), () -> {
142 addDataSection(sw, repository, unit, desc, langCode);
143 addPropertyValues(sw, desc);
144 Iterable<DocumentaryUnit> orderedChildren = getOrderedChildren(unit);
145 if (orderedChildren.iterator().hasNext()) {
146 tag(sw, "dsc", () -> {
147 for (DocumentaryUnit child : orderedChildren) {
148 addEadLevel(sw, 1, child, descOpt, langCode);
149 }
150 });
151 }
152 addControlAccess(sw, desc);
153 });
154 });
155 });
156 }
157
158 private void addProfileDesc(XMLStreamWriter sw, Description desc) {
159 tag(sw, "profiledesc", () -> {
160 tag(sw, "creation", () -> {
161 characters(sw, resourceAsString("export-boilerplate.txt"));
162 DateTime now = DateTime.now();
163 tag(sw, "date", now.toString(), attrs("normal", unitDateNormalFormat.print(now)
164 ));
165 });
166 tag(sw, "langusage", () -> tag(sw, "language",
167 LanguageHelpers.codeToName(desc.getLanguageOfDescription()),
168 attrs("langcode", desc.getLanguageOfDescription())
169 ));
170 Optional.ofNullable(desc.<String>getProperty(IsadG.rulesAndConventions)).ifPresent(value ->
171 tag(sw, "descrules", value, attrs("encodinganalog", "3.7.2"))
172 );
173 });
174 }
175
176 private void addFileDesc(XMLStreamWriter sw, String langCode, Repository repository, Description desc) {
177 tag(sw, "filedesc", () -> {
178 tag(sw, "titlestmt", () -> tag(sw, "titleproper", desc.getName()));
179 tag(sw, "publicationstmt", () -> {
180 LanguageHelpers.getBestDescription(
181 repository, Optional.empty(), langCode).ifPresent(repoDesc -> {
182 tag(sw, "publisher", repoDesc.getName());
183 for (Address address : repoDesc.as(RepositoryDescription.class).getAddresses()) {
184 tag(sw, "address", () -> {
185 for (ContactInfo key : addressKeys) {
186 for (Object v : coerceList(address.getProperty(key))) {
187 tag(sw, "addressline", v.toString());
188 }
189 }
190 tag(sw, "addressline",
191 LanguageHelpers.countryCodeToName(
192 repository.getCountry().getId()));
193 });
194 }
195 });
196 });
197 if (Description.CreationProcess.IMPORT.equals(desc.getCreationProcess())) {
198 tag(sw, ImmutableList.of("notestmt", "note", "p"), resourceAsString("creationprocess-boilerplate.txt"));
199 }
200 });
201 }
202
203 private void addRevisionDesc(XMLStreamWriter sw, DocumentaryUnit unit) {
204 if (config.getBoolean("io.export.ead.includeRevisions")) {
205 List<List<SystemEvent>> eventList = Lists.newArrayList(api.events().aggregateForItem(unit));
206 if (!eventList.isEmpty()) {
207 tag(sw, "revisiondesc", () -> {
208 for (List<SystemEvent> agg : eventList) {
209 SystemEvent event = agg.get(0);
210 String eventDesc = getEventDescription(event.getEventType());
211 tag(sw, "change", () -> {
212 tag(sw, "date", new DateTime(event.getTimestamp()).toString());
213 if (event.getLogMessage() == null || event.getLogMessage().isEmpty()) {
214 tag(sw, "item", eventDesc);
215 } else {
216 tag(sw, "item", String.format("%s [%s]",
217 event.getLogMessage(), eventDesc));
218 }
219 });
220 }
221 });
222 }
223 }
224 }
225
226 private void addDataSection(XMLStreamWriter sw, Repository repository, DocumentaryUnit subUnit,
227 Description desc, String langCode) {
228 tag(sw, "did", () -> {
229 tag(sw, "unitid", subUnit.getIdentifier());
230 tag(sw, "unittitle", desc.getName(), attrs("encodinganalog", "3.1.2"));
231
232 for (DatePeriod datePeriod : desc.as(DocumentaryUnitDescription.class).getDatePeriods()) {
233 if (DatePeriod.DatePeriodType.creation.equals(datePeriod.getDateType())) {
234 String start = datePeriod.getStartDate();
235 String end = datePeriod.getEndDate();
236 if (start != null && end != null) {
237 DateTime startDateTime = new DateTime(start);
238 DateTime endDateTime = new DateTime(end);
239 String normal = String.format("%s/%s",
240 unitDateNormalFormat.print(startDateTime),
241 unitDateNormalFormat.print(endDateTime));
242 String text = String.format("%s/%s",
243 startDateTime.year().get(), endDateTime.year().get());
244 tag(sw, "unitdate", text, attrs("normal", normal, "encodinganalog", "3.1.3"));
245 } else if (start != null) {
246 DateTime startDateTime = new DateTime(start);
247 String normal = String.format("%s",
248 unitDateNormalFormat.print(startDateTime));
249 String text = String.format("%s", startDateTime.year().get());
250 tag(sw, "unitdate", text, attrs("normal", normal, "encodinganalog", "3.1.3"));
251 }
252 }
253 }
254
255 Set<String> propertyKeys = desc.getPropertyKeys();
256 for (Map.Entry<IsadG, String> pair : textDidMappings.entrySet()) {
257 if (propertyKeys.contains(pair.getKey().name())) {
258 for (Object v : coerceList(desc.getProperty(pair.getKey()))) {
259 tag(sw, pair.getValue(), v.toString(), textFieldAttrs(pair.getKey()));
260 }
261 }
262 }
263
264 if (propertyKeys.contains(IsadG.languageOfMaterial.name())) {
265 tag(sw, "langmaterial", () -> {
266 for (Object v : coerceList(desc.getProperty(IsadG.languageOfMaterial))) {
267 String langName = LanguageHelpers.codeToName(v.toString());
268 if (v.toString().length() != 3) {
269 tag(sw, "language", langName, textFieldAttrs(IsadG.languageOfMaterial));
270 } else {
271 tag(sw, "language", langName, textFieldAttrs(IsadG.languageOfMaterial, "langcode", v
272 .toString()));
273 }
274 }
275 });
276 }
277
278 Optional.ofNullable(repository).ifPresent(repo -> {
279 LanguageHelpers.getBestDescription(repo, Optional.empty(), langCode).ifPresent(repoDesc ->
280 tag(sw, "repository", () ->
281 tag(sw, "corpname", repoDesc.getName()))
282 );
283 });
284 });
285 }
286
287 private void addEadLevel(XMLStreamWriter sw, int num, DocumentaryUnit subUnit,
288 Optional<Description> priorDescOpt, String langCode) {
289 logger.trace("Adding EAD sublevel: c" + num);
290 Optional<Description> descOpt = LanguageHelpers.getBestDescription(subUnit, priorDescOpt, langCode);
291 String levelTag = String.format("c%02d", num);
292 tag(sw, levelTag, getLevelAttrs(descOpt, null), () -> {
293 descOpt.ifPresent(desc -> {
294 addDataSection(sw, null, subUnit, desc, langCode);
295 addPropertyValues(sw, desc);
296 addControlAccess(sw, desc);
297 });
298
299 for (DocumentaryUnit child : getOrderedChildren(subUnit)) {
300 addEadLevel(sw, num + 1, child, descOpt, langCode);
301 }
302 });
303 }
304
305 private void addControlAccess(XMLStreamWriter sw, Description desc) {
306 Map<AccessPointType, List<AccessPoint>> byType = Maps.newHashMap();
307 for (AccessPoint accessPoint : desc.getAccessPoints()) {
308 AccessPointType type = accessPoint.getRelationshipType();
309 if (controlAccessMappings.containsKey(type)) {
310 if (byType.containsKey(type)) {
311 byType.get(type).add(accessPoint);
312 } else {
313 byType.put(type, Lists.newArrayList(accessPoint));
314 }
315 }
316 }
317
318 for (Map.Entry<AccessPointType, List<AccessPoint>> entry : byType.entrySet()) {
319 tag(sw, "controlaccess", () -> {
320 AccessPointType type = entry.getKey();
321 for (AccessPoint accessPoint : entry.getValue()) {
322 tag(sw, controlAccessMappings.get(type), accessPoint.getName(),
323 getAccessPointAttributes(accessPoint));
324 }
325 });
326 }
327 }
328
329 private Map<String, String> getAccessPointAttributes(AccessPoint accessPoint) {
330 for (Link link : accessPoint.getLinks()) {
331 for (Entity target : link.getLinkTargets()) {
332 if (target.getType().equals(Entities.CVOC_CONCEPT) ||
333 target.getType().equals(Entities.HISTORICAL_AGENT)) {
334 AuthoritativeItem item = target.as(AuthoritativeItem.class);
335 try {
336 return ImmutableMap.of(
337 "source", item.getAuthoritativeSet().getId(),
338 "authfilenumber", item.getIdentifier()
339 );
340 } catch (NullPointerException e) {
341 logger.warn("Authoritative item with missing set: {}", item.getId());
342 }
343 }
344 }
345 }
346 return Collections.emptyMap();
347 }
348
349 private void addPropertyValues(XMLStreamWriter sw, Entity item) {
350 Set<String> available = item.getPropertyKeys();
351 for (Map.Entry<IsadG, String> pair : multiValueTextMappings.entrySet()) {
352 if (available.contains(pair.getKey().name())) {
353 for (Object v : coerceList(item.getProperty(pair.getKey()))) {
354 tag(sw, pair.getValue(), textFieldAttrs(pair.getKey()),
355 () -> tag(sw, "p", () -> cData(sw, v.toString()))
356 );
357 }
358 }
359 }
360 for (Object v : coerceList(item.getProperty(IsadG.datesOfDescriptions))) {
361 tag(sw, "processinfo", textFieldAttrs(IsadG.datesOfDescriptions), () -> {
362 tag(sw, Lists.newArrayList("p", "date"), () -> cData(sw, v.toString()));
363 });
364 }
365 if (available.contains(IsadG.sources.name())) {
366 tag(sw, "processinfo", textFieldAttrs(IsadG.sources, "type", "Sources"), () -> {
367 tag(sw, "p", () -> {
368 for (Object v : coerceList(item.getProperty(IsadG.sources))) {
369 tag(sw, "bibref", () -> cData(sw, v.toString()));
370 }
371 });
372 });
373 }
374 }
375
376 private Map<String, String> textFieldAttrs(IsadG field, String... kvs) {
377 Preconditions.checkArgument(kvs.length % 2 == 0);
378 Map<String, String> attrs = field.getAnalogueEncoding()
379 .map(Collections::singleton)
380 .orElse(Collections.emptySet())
381 .stream().collect(Collectors.toMap(e -> "encodinganalog", e -> e));
382 for (int i = 0; i < kvs.length; i += 2) {
383 attrs.put(kvs[0], kvs[i + 1]);
384 }
385 return attrs;
386 }
387
388 private Map<String, String> getLevelAttrs(Optional<Description> descOpt, String defaultLevel) {
389 String level = descOpt
390 .map(d -> d.<String>getProperty(IsadG.levelOfDescription))
391 .orElse(defaultLevel);
392 return level != null ? ImmutableMap.of("level", level) : Collections.emptyMap();
393 }
394
395
396 private Iterable<DocumentaryUnit> getOrderedChildren(DocumentaryUnit unit) {
397 return api
398 .query()
399 .orderBy(Ontology.IDENTIFIER_KEY, QueryApi.Sort.ASC)
400 .setLimit(-1)
401 .setStream(true)
402 .page(unit.getChildren(), DocumentaryUnit.class);
403 }
404
405 private String getEventDescription(EventTypes eventType) {
406 try {
407 return i18n.getString(eventType.name());
408 } catch (MissingResourceException e) {
409 return eventType.name();
410 }
411 }
412 }