View Javadoc

1   /*
2    * Copyright 2015 Data Archiving and Networked Services (an institute of
3    * Koninklijke Nederlandse Akademie van Wetenschappen), King's College London,
4    * Georg-August-Universitaet Goettingen Stiftung Oeffentlichen Rechts
5    *
6    * Licensed under the EUPL, Version 1.1 or – as soon they will be approved by
7    * the European Commission - subsequent versions of the EUPL (the "Licence");
8    * You may not use this work except in compliance with the Licence.
9    * You may obtain a copy of the Licence at:
10   *
11   * https://joinup.ec.europa.eu/software/page/eupl
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the Licence is distributed on an "AS IS" basis,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the Licence for the specific language governing
17   * permissions and limitations under the Licence.
18   */
19  
20  package eu.ehri.project.oaipmh;
21  
22  import com.google.common.base.Joiner;
23  import com.google.common.collect.ImmutableList;
24  import com.google.common.collect.Maps;
25  import com.typesafe.config.Config;
26  import eu.ehri.project.api.QueryApi;
27  import eu.ehri.project.exporters.xml.StreamingXmlDsl;
28  import eu.ehri.project.models.DocumentaryUnit;
29  import eu.ehri.project.models.events.SystemEvent;
30  import eu.ehri.project.oaipmh.errors.OaiPmhError;
31  import org.slf4j.Logger;
32  import org.slf4j.LoggerFactory;
33  
34  import javax.xml.stream.XMLStreamWriter;
35  import java.time.LocalDateTime;
36  import java.time.ZonedDateTime;
37  import java.time.format.DateTimeFormatter;
38  import java.time.temporal.ChronoUnit;
39  import java.util.HashMap;
40  import java.util.List;
41  import java.util.Map;
42  
43  
44  public class OaiPmhExporter extends StreamingXmlDsl {
45  
46      private static final Logger log = LoggerFactory.getLogger(OaiPmhExporter.class);
47  
48      private static final String DEFAULT_NAMESPACE = "http://www.openarchives.org/OAI/2.0/";
49  
50      static final DateTimeFormatter DATE_PATTERN = DateTimeFormatter
51              .ofPattern("YYYY-MM-dd'T'hh:mm:ss'Z'");
52  
53      private static final Map<String, String> NAMESPACES = namespaces(
54              "xsi", "http://www.w3.org/2001/XMLSchema-instance"
55      );
56  
57      private static final Map<String, String> DC_NAMESPACES = namespaces(
58              "oai_dc", MetadataPrefix.oai_dc.namespace(),
59              "xsi", "http://www.w3.org/2001/XMLSchema-instance",
60              "dc", "http://purl.org/dc/elements/1.1/");
61  
62      private final OaiPmhRenderer renderer;
63      private final Config config;
64      private final OaiPmhData data;
65  
66      public OaiPmhExporter(OaiPmhData data, OaiPmhRenderer renderer, Config config) {
67          this.renderer = renderer;
68          this.config = config;
69          this.data = data;
70      }
71  
72      public void performVerb(XMLStreamWriter sw, OaiPmhState state) {
73          try {
74              switch (state.getVerb()) {
75                  case Identify:
76                      identify(sw, state);
77                      break;
78                  case ListMetadataFormats:
79                      listMetadataFormats(sw, state);
80                      break;
81                  case ListSets:
82                      listSets(sw, state);
83                      break;
84                  case GetRecord:
85                      getRecord(sw, state);
86                      break;
87                  case ListIdentifiers:
88                      listIdentifiers(sw, state);
89                      break;
90                  case ListRecords:
91                      listRecords(sw, state);
92                      break;
93              }
94          } catch (OaiPmhError e) {
95              error(sw, e.getCode(), e.getMessage(), state);
96          }
97      }
98  
99      private void identify(XMLStreamWriter sw, OaiPmhState state) {
100         withDoc(sw, () -> {
101             preamble(sw, Verb.Identify.name(), state.toMap());
102             tag(sw, Verb.Identify.name(), () -> {
103                 tag(sw, "repositoryName", config.getString("oaipmh.repositoryName"));
104                 tag(sw, "baseURL", config.getString("oaipmh.baseURL"));
105                 tag(sw, "protocolVersion", "2.0");
106                 tag(sw, "adminEmail", config.getString("oaipmh.adminEmail"));
107                 tag(sw, "earliestDatestamp", formatDate(data.getEarliestTimestamp()));
108                 tag(sw, "deletedRecord", "persistent");
109                 tag(sw, "granularity", "YYYY-MM-DDThh:mm:ssZ");
110                 if (config.hasPath("oaipmh.compression")) {
111                     tag(sw, "compression", config.getString("oaipmh.compression"));
112                 }
113             });
114         });
115     }
116 
117     private void listMetadataFormats(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
118         if (state.getIdentifier() != null) {
119             if (data.getRecord(state).isInvalid()) {
120                 throw new OaiPmhError(ErrorCode.idDoesNotExist,
121                         "Identifier does not exist: " + state.getIdentifier());
122             }
123         }
124         withDoc(sw, () -> {
125             preamble(sw, Verb.ListMetadataFormats.name(), state.toMap());
126             tag(sw, Verb.ListMetadataFormats.name(), () -> {
127                 for (MetadataPrefix prefix : MetadataPrefix.values()) {
128                     tag(sw, "metadataFormat", () -> {
129                         tag(sw, "metadataPrefix", prefix.name());
130                         tag(sw, "schema", prefix.schema());
131                         tag(sw, "metadataNamespace", prefix.namespace());
132                     });
133                 }
134             });
135         });
136     }
137 
138     private void listSets(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
139         QueryApi.Page<OaiPmhSet> sets = data.getSets(state);
140         long count = sets.getTotal();
141         Map<String, String> rtAttrs = getResumptionAttrs(sets);
142         withDoc(sw, () -> {
143             preamble(sw, Verb.ListSets.name(), state.toMap());
144             tag(sw, Verb.ListSets.name(), () -> {
145                 for (OaiPmhSet set: sets) {
146                     tag(sw, "set", () -> {
147                         tag(sw, "setSpec", set.getId());
148                         tag(sw, "setName", set.getName());
149                         tag(sw, "setDescription", () -> dcDescription(sw, set.getDescription()));
150                     });
151                 }
152                 if (state.shouldResume(Math.toIntExact(count))) {
153                     tag(sw, "resumptionToken", state.nextState(), rtAttrs);
154                 } else if (state.hasResumed()) {
155                     tag(sw, "resumptionToken", null, rtAttrs);
156                 }
157             });
158         });
159     }
160 
161     private void getRecord(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
162         OaiPmhRecordResult record = data.getRecord(state);
163         record.doc().ifPresent(item ->
164                 withDoc(sw, () -> {
165                     preamble(sw, Verb.GetRecord.name(), state.toMap());
166                     tag(sw, Verb.GetRecord.name(), () ->
167                             tag(sw, "record", () -> {
168                                 tag(sw, "header", () -> writeRecordHeader(sw, state.getIdentifier(), item));
169                                 tag(sw, "metadata", () -> renderer.render(sw, state.getMetadataPrefix(), item));
170                             }));
171                 })
172         );
173 
174         record.deleted().ifPresent(deleted ->
175                 writeDeletedRecord(sw, deleted.getId(),
176                         formatDate(deleted.getDatestamp()), deleted.getSets())
177         );
178 
179         if (record.isInvalid()) {
180             throw new OaiPmhError(ErrorCode.idDoesNotExist,
181                     "ID does not exist: " + state.getIdentifier());
182         }
183     }
184 
185     private void listIdentifiers(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
186         QueryApi.Page<DocumentaryUnit> items = data.getFilteredDocumentaryUnits(state);
187         long count = items.getTotal();
188         Iterable<OaiPmhDeleted> deleted = data.getFilteredDeletedDocumentaryUnits(state);
189         Map<String, String> rtAttrs = getResumptionAttrs(items);
190         if (count == 0 && !deleted.iterator().hasNext()) {
191             throw new OaiPmhError(ErrorCode.noRecordsMatch);
192         }
193 
194         withDoc(sw, () -> {
195             preamble(sw, Verb.ListIdentifiers.name(), state.toMap());
196             tag(sw, Verb.ListIdentifiers.name(), () -> {
197                 for (DocumentaryUnit item : items) {
198                     tag(sw, "header", () -> writeRecordHeader(sw, item.getId(), item));
199                 }
200                 if (state.shouldResume(Math.toIntExact(count))) {
201                     tag(sw, "resumptionToken", state.nextState(), rtAttrs);
202                 } else {
203                     for (OaiPmhDeleted item : deleted) {
204                         writeDeletedRecord(sw, item.getId(), formatDate(item.getDatestamp()), item.getSets());
205                     }
206                     if (state.hasResumed()) {
207                         tag(sw, "resumptionToken", null, rtAttrs);
208                     }
209                 }
210             });
211         });
212     }
213 
214     private void listRecords(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
215         LocalDateTime before = LocalDateTime.now();
216         QueryApi.Page<DocumentaryUnit> items = data.getFilteredDocumentaryUnits(state);
217         long count = items.getTotal();
218         Map<String, String> rtAttrs = getResumptionAttrs(items);
219         Iterable<OaiPmhDeleted> deleted = data.getFilteredDeletedDocumentaryUnits(state);
220         if (count == 0 && !deleted.iterator().hasNext()) {
221             throw new OaiPmhError(ErrorCode.noRecordsMatch);
222         }
223         LocalDateTime after = LocalDateTime.now();
224         log.debug("Fetched {} items in {} millis", count, before.until(after, ChronoUnit.MILLIS));
225 
226         withDoc(sw, () -> {
227             preamble(sw, Verb.ListRecords.name(), state.toMap());
228             tag(sw, Verb.ListRecords.name(), () -> {
229                 for (DocumentaryUnit item : items) {
230                     tag(sw, "record", () -> {
231                         tag(sw, "header", () -> writeRecordHeader(sw, item.getId(), item));
232                         tag(sw, "metadata", () -> renderer.render(sw, state.getMetadataPrefix(), item));
233                     });
234                 }
235                 if (state.shouldResume(Math.toIntExact(count))) {
236                     tag(sw, "resumptionToken", state.nextState(), rtAttrs);
237                 } else {
238                     for (OaiPmhDeleted item : deleted) {
239                         tag(sw, "record", () ->
240                                 writeDeletedRecord(sw, item.getId(), formatDate(item.getDatestamp()), item.getSets()));
241                     }
242                     if (state.hasResumed()) {
243                         tag(sw, "resumptionToken", null, rtAttrs);
244                     }
245                 }
246             });
247         });
248     }
249 
250     private void writeRecordHeader(XMLStreamWriter sw, String id, DocumentaryUnit item) {
251         tag(sw, "identifier", id);
252         SystemEvent event = item.getLatestEvent();
253         tag(sw, "datestamp", event != null
254                 ? formatDate(event.getTimestamp())
255                 : formatDate(ZonedDateTime.now()));
256         tag(sw, "setSpec", item.getRepository().getCountry().getCode());
257         tag(sw, "setSpec", item.getRepository().getCountry().getCode() + ":"
258                 + item.getRepository().getId());
259     }
260 
261     private void writeDeletedRecord(XMLStreamWriter sw, String id, String timestamp, List<String> sets) {
262         tag(sw, "header", attrs("status", "deleted"), () -> {
263             tag(sw, "identifier", id);
264             tag(sw, "datestamp", timestamp);
265             for (String setSpec : sets) {
266                 tag(sw, "setSpec", setSpec);
267             }
268         });
269     }
270 
271     private void withDoc(XMLStreamWriter sw, Runnable runnable) {
272         doc(sw, () -> root(sw, "OAI-PMH", DEFAULT_NAMESPACE, attrs(), NAMESPACES, () -> {
273             attribute(sw, "http://www.w3.org/2001/XMLSchema-instance",
274                     "schemaLocation", DEFAULT_NAMESPACE + " http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd " +
275                             Joiner.on(' ').join(ImmutableList.of(
276                                     MetadataPrefix.oai_dc.namespace(), MetadataPrefix.oai_dc.schema(),
277                                     MetadataPrefix.ead.namespace(), MetadataPrefix.ead.schema()
278                             ))
279             );
280             runnable.run();
281         }));
282     }
283 
284     private void dcDescription(XMLStreamWriter sw, String description) {
285         root(sw, "oai_dc:dc", null, attrs(), DC_NAMESPACES, () -> {
286             attribute(sw, "http://www.w3.org/2001/XMLSchema-instance",
287                     "schemaLocation", MetadataPrefix.oai_dc.namespace()
288                             + " " + MetadataPrefix.oai_dc.schema());
289             tag(sw, "dc:description", description);
290         });
291     }
292 
293     private void preamble(XMLStreamWriter sw, String verb, Map<String, String> attrs) {
294         String time = formatDate(ZonedDateTime.now());
295         HashMap<String, String> attrMap = Maps.newHashMap();
296         attrMap.putAll(attrs);
297         if (verb != null) {
298             attrMap.put("verb", verb);
299         }
300         tag(sw, "responseDate", time);
301         tag(sw, "request", config.getString("oaipmh.baseURL"), attrMap);
302     }
303 
304     private void error(XMLStreamWriter sw, ErrorCode code, String msg, OaiPmhState state) {
305         Map<String, String> attrs = attrs("metadataPrefix", state.getMetadataPrefix());
306         if (state.getIdentifier() != null) {
307             attrs.put("identifier", state.getIdentifier());
308         }
309         withDoc(sw, () -> {
310             preamble(sw, state.getVerb().name(), attrs);
311             tag(sw, "error", msg, attrs("code", code.name()));
312         });
313     }
314 
315     public void error(XMLStreamWriter sw, ErrorCode code, String msg, Verb verb) {
316         withDoc(sw, () -> {
317             preamble(sw, verb != null ? verb.name() : null, attrs());
318             tag(sw, "error", msg, attrs("code", code.name()));
319         });
320     }
321 
322     private Map<String, String> getResumptionAttrs(QueryApi.Page<?> page) {
323         return attrs("completeListSize", page.getTotal(), "cursor", page.getOffset());
324     }
325 
326     private static String formatDate(String timestamp) {
327         return formatDate(ZonedDateTime.parse(timestamp));
328     }
329 
330     private static String formatDate(ZonedDateTime time) {
331         return time.format(DATE_PATTERN);
332     }
333 }