1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package eu.ehri.project.oaipmh;
21
22 import com.google.common.base.Joiner;
23 import com.google.common.collect.ImmutableList;
24 import com.google.common.collect.Maps;
25 import com.typesafe.config.Config;
26 import eu.ehri.project.api.QueryApi;
27 import eu.ehri.project.exporters.xml.StreamingXmlDsl;
28 import eu.ehri.project.models.DocumentaryUnit;
29 import eu.ehri.project.models.events.SystemEvent;
30 import eu.ehri.project.oaipmh.errors.OaiPmhError;
31 import org.slf4j.Logger;
32 import org.slf4j.LoggerFactory;
33
34 import javax.xml.stream.XMLStreamWriter;
35 import java.time.LocalDateTime;
36 import java.time.ZonedDateTime;
37 import java.time.format.DateTimeFormatter;
38 import java.time.temporal.ChronoUnit;
39 import java.util.HashMap;
40 import java.util.List;
41 import java.util.Map;
42
43
44 public class OaiPmhExporter extends StreamingXmlDsl {
45
46 private static final Logger log = LoggerFactory.getLogger(OaiPmhExporter.class);
47
48 private static final String DEFAULT_NAMESPACE = "http://www.openarchives.org/OAI/2.0/";
49
50 static final DateTimeFormatter DATE_PATTERN = DateTimeFormatter
51 .ofPattern("YYYY-MM-dd'T'hh:mm:ss'Z'");
52
53 private static final Map<String, String> NAMESPACES = namespaces(
54 "xsi", "http://www.w3.org/2001/XMLSchema-instance"
55 );
56
57 private static final Map<String, String> DC_NAMESPACES = namespaces(
58 "oai_dc", MetadataPrefix.oai_dc.namespace(),
59 "xsi", "http://www.w3.org/2001/XMLSchema-instance",
60 "dc", "http://purl.org/dc/elements/1.1/");
61
62 private final OaiPmhRenderer renderer;
63 private final Config config;
64 private final OaiPmhData data;
65
66 public OaiPmhExporter(OaiPmhData data, OaiPmhRenderer renderer, Config config) {
67 this.renderer = renderer;
68 this.config = config;
69 this.data = data;
70 }
71
72 public void performVerb(XMLStreamWriter sw, OaiPmhState state) {
73 try {
74 switch (state.getVerb()) {
75 case Identify:
76 identify(sw, state);
77 break;
78 case ListMetadataFormats:
79 listMetadataFormats(sw, state);
80 break;
81 case ListSets:
82 listSets(sw, state);
83 break;
84 case GetRecord:
85 getRecord(sw, state);
86 break;
87 case ListIdentifiers:
88 listIdentifiers(sw, state);
89 break;
90 case ListRecords:
91 listRecords(sw, state);
92 break;
93 }
94 } catch (OaiPmhError e) {
95 error(sw, e.getCode(), e.getMessage(), state);
96 }
97 }
98
99 private void identify(XMLStreamWriter sw, OaiPmhState state) {
100 withDoc(sw, () -> {
101 preamble(sw, Verb.Identify.name(), state.toMap());
102 tag(sw, Verb.Identify.name(), () -> {
103 tag(sw, "repositoryName", config.getString("oaipmh.repositoryName"));
104 tag(sw, "baseURL", config.getString("oaipmh.baseURL"));
105 tag(sw, "protocolVersion", "2.0");
106 tag(sw, "adminEmail", config.getString("oaipmh.adminEmail"));
107 tag(sw, "earliestDatestamp", formatDate(data.getEarliestTimestamp()));
108 tag(sw, "deletedRecord", "persistent");
109 tag(sw, "granularity", "YYYY-MM-DDThh:mm:ssZ");
110 if (config.hasPath("oaipmh.compression")) {
111 tag(sw, "compression", config.getString("oaipmh.compression"));
112 }
113 });
114 });
115 }
116
117 private void listMetadataFormats(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
118 if (state.getIdentifier() != null) {
119 if (data.getRecord(state).isInvalid()) {
120 throw new OaiPmhError(ErrorCode.idDoesNotExist,
121 "Identifier does not exist: " + state.getIdentifier());
122 }
123 }
124 withDoc(sw, () -> {
125 preamble(sw, Verb.ListMetadataFormats.name(), state.toMap());
126 tag(sw, Verb.ListMetadataFormats.name(), () -> {
127 for (MetadataPrefix prefix : MetadataPrefix.values()) {
128 tag(sw, "metadataFormat", () -> {
129 tag(sw, "metadataPrefix", prefix.name());
130 tag(sw, "schema", prefix.schema());
131 tag(sw, "metadataNamespace", prefix.namespace());
132 });
133 }
134 });
135 });
136 }
137
138 private void listSets(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
139 QueryApi.Page<OaiPmhSet> sets = data.getSets(state);
140 long count = sets.getTotal();
141 Map<String, String> rtAttrs = getResumptionAttrs(sets);
142 withDoc(sw, () -> {
143 preamble(sw, Verb.ListSets.name(), state.toMap());
144 tag(sw, Verb.ListSets.name(), () -> {
145 for (OaiPmhSet set: sets) {
146 tag(sw, "set", () -> {
147 tag(sw, "setSpec", set.getId());
148 tag(sw, "setName", set.getName());
149 tag(sw, "setDescription", () -> dcDescription(sw, set.getDescription()));
150 });
151 }
152 if (state.shouldResume(Math.toIntExact(count))) {
153 tag(sw, "resumptionToken", state.nextState(), rtAttrs);
154 } else if (state.hasResumed()) {
155 tag(sw, "resumptionToken", null, rtAttrs);
156 }
157 });
158 });
159 }
160
161 private void getRecord(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
162 OaiPmhRecordResult record = data.getRecord(state);
163 record.doc().ifPresent(item ->
164 withDoc(sw, () -> {
165 preamble(sw, Verb.GetRecord.name(), state.toMap());
166 tag(sw, Verb.GetRecord.name(), () ->
167 tag(sw, "record", () -> {
168 tag(sw, "header", () -> writeRecordHeader(sw, state.getIdentifier(), item));
169 tag(sw, "metadata", () -> renderer.render(sw, state.getMetadataPrefix(), item));
170 }));
171 })
172 );
173
174 record.deleted().ifPresent(deleted ->
175 writeDeletedRecord(sw, deleted.getId(),
176 formatDate(deleted.getDatestamp()), deleted.getSets())
177 );
178
179 if (record.isInvalid()) {
180 throw new OaiPmhError(ErrorCode.idDoesNotExist,
181 "ID does not exist: " + state.getIdentifier());
182 }
183 }
184
185 private void listIdentifiers(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
186 QueryApi.Page<DocumentaryUnit> items = data.getFilteredDocumentaryUnits(state);
187 long count = items.getTotal();
188 Iterable<OaiPmhDeleted> deleted = data.getFilteredDeletedDocumentaryUnits(state);
189 Map<String, String> rtAttrs = getResumptionAttrs(items);
190 if (count == 0 && !deleted.iterator().hasNext()) {
191 throw new OaiPmhError(ErrorCode.noRecordsMatch);
192 }
193
194 withDoc(sw, () -> {
195 preamble(sw, Verb.ListIdentifiers.name(), state.toMap());
196 tag(sw, Verb.ListIdentifiers.name(), () -> {
197 for (DocumentaryUnit item : items) {
198 tag(sw, "header", () -> writeRecordHeader(sw, item.getId(), item));
199 }
200 if (state.shouldResume(Math.toIntExact(count))) {
201 tag(sw, "resumptionToken", state.nextState(), rtAttrs);
202 } else {
203 for (OaiPmhDeleted item : deleted) {
204 writeDeletedRecord(sw, item.getId(), formatDate(item.getDatestamp()), item.getSets());
205 }
206 if (state.hasResumed()) {
207 tag(sw, "resumptionToken", null, rtAttrs);
208 }
209 }
210 });
211 });
212 }
213
214 private void listRecords(XMLStreamWriter sw, OaiPmhState state) throws OaiPmhError {
215 LocalDateTime before = LocalDateTime.now();
216 QueryApi.Page<DocumentaryUnit> items = data.getFilteredDocumentaryUnits(state);
217 long count = items.getTotal();
218 Map<String, String> rtAttrs = getResumptionAttrs(items);
219 Iterable<OaiPmhDeleted> deleted = data.getFilteredDeletedDocumentaryUnits(state);
220 if (count == 0 && !deleted.iterator().hasNext()) {
221 throw new OaiPmhError(ErrorCode.noRecordsMatch);
222 }
223 LocalDateTime after = LocalDateTime.now();
224 log.debug("Fetched {} items in {} millis", count, before.until(after, ChronoUnit.MILLIS));
225
226 withDoc(sw, () -> {
227 preamble(sw, Verb.ListRecords.name(), state.toMap());
228 tag(sw, Verb.ListRecords.name(), () -> {
229 for (DocumentaryUnit item : items) {
230 tag(sw, "record", () -> {
231 tag(sw, "header", () -> writeRecordHeader(sw, item.getId(), item));
232 tag(sw, "metadata", () -> renderer.render(sw, state.getMetadataPrefix(), item));
233 });
234 }
235 if (state.shouldResume(Math.toIntExact(count))) {
236 tag(sw, "resumptionToken", state.nextState(), rtAttrs);
237 } else {
238 for (OaiPmhDeleted item : deleted) {
239 tag(sw, "record", () ->
240 writeDeletedRecord(sw, item.getId(), formatDate(item.getDatestamp()), item.getSets()));
241 }
242 if (state.hasResumed()) {
243 tag(sw, "resumptionToken", null, rtAttrs);
244 }
245 }
246 });
247 });
248 }
249
250 private void writeRecordHeader(XMLStreamWriter sw, String id, DocumentaryUnit item) {
251 tag(sw, "identifier", id);
252 SystemEvent event = item.getLatestEvent();
253 tag(sw, "datestamp", event != null
254 ? formatDate(event.getTimestamp())
255 : formatDate(ZonedDateTime.now()));
256 tag(sw, "setSpec", item.getRepository().getCountry().getCode());
257 tag(sw, "setSpec", item.getRepository().getCountry().getCode() + ":"
258 + item.getRepository().getId());
259 }
260
261 private void writeDeletedRecord(XMLStreamWriter sw, String id, String timestamp, List<String> sets) {
262 tag(sw, "header", attrs("status", "deleted"), () -> {
263 tag(sw, "identifier", id);
264 tag(sw, "datestamp", timestamp);
265 for (String setSpec : sets) {
266 tag(sw, "setSpec", setSpec);
267 }
268 });
269 }
270
271 private void withDoc(XMLStreamWriter sw, Runnable runnable) {
272 doc(sw, () -> root(sw, "OAI-PMH", DEFAULT_NAMESPACE, attrs(), NAMESPACES, () -> {
273 attribute(sw, "http://www.w3.org/2001/XMLSchema-instance",
274 "schemaLocation", DEFAULT_NAMESPACE + " http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd " +
275 Joiner.on(' ').join(ImmutableList.of(
276 MetadataPrefix.oai_dc.namespace(), MetadataPrefix.oai_dc.schema(),
277 MetadataPrefix.ead.namespace(), MetadataPrefix.ead.schema()
278 ))
279 );
280 runnable.run();
281 }));
282 }
283
284 private void dcDescription(XMLStreamWriter sw, String description) {
285 root(sw, "oai_dc:dc", null, attrs(), DC_NAMESPACES, () -> {
286 attribute(sw, "http://www.w3.org/2001/XMLSchema-instance",
287 "schemaLocation", MetadataPrefix.oai_dc.namespace()
288 + " " + MetadataPrefix.oai_dc.schema());
289 tag(sw, "dc:description", description);
290 });
291 }
292
293 private void preamble(XMLStreamWriter sw, String verb, Map<String, String> attrs) {
294 String time = formatDate(ZonedDateTime.now());
295 HashMap<String, String> attrMap = Maps.newHashMap();
296 attrMap.putAll(attrs);
297 if (verb != null) {
298 attrMap.put("verb", verb);
299 }
300 tag(sw, "responseDate", time);
301 tag(sw, "request", config.getString("oaipmh.baseURL"), attrMap);
302 }
303
304 private void error(XMLStreamWriter sw, ErrorCode code, String msg, OaiPmhState state) {
305 Map<String, String> attrs = attrs("metadataPrefix", state.getMetadataPrefix());
306 if (state.getIdentifier() != null) {
307 attrs.put("identifier", state.getIdentifier());
308 }
309 withDoc(sw, () -> {
310 preamble(sw, state.getVerb().name(), attrs);
311 tag(sw, "error", msg, attrs("code", code.name()));
312 });
313 }
314
315 public void error(XMLStreamWriter sw, ErrorCode code, String msg, Verb verb) {
316 withDoc(sw, () -> {
317 preamble(sw, verb != null ? verb.name() : null, attrs());
318 tag(sw, "error", msg, attrs("code", code.name()));
319 });
320 }
321
322 private Map<String, String> getResumptionAttrs(QueryApi.Page<?> page) {
323 return attrs("completeListSize", page.getTotal(), "cursor", page.getOffset());
324 }
325
326 private static String formatDate(String timestamp) {
327 return formatDate(ZonedDateTime.parse(timestamp));
328 }
329
330 private static String formatDate(ZonedDateTime time) {
331 return time.format(DATE_PATTERN);
332 }
333 }