1 package eu.ehri.project.importers.util;
2
3 import com.google.common.collect.Lists;
4 import com.google.common.collect.Maps;
5 import eu.ehri.project.definitions.Entities;
6 import eu.ehri.project.definitions.Ontology;
7 import eu.ehri.project.importers.properties.XmlImportProperties;
8 import org.joda.time.DateTime;
9 import org.joda.time.format.DateTimeFormatter;
10 import org.joda.time.format.ISODateTimeFormat;
11
12 import java.text.ParsePosition;
13 import java.text.SimpleDateFormat;
14 import java.util.List;
15 import java.util.Locale;
16 import java.util.Map;
17 import java.util.Optional;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20
21 import static eu.ehri.project.importers.util.ImportHelpers.getSubNode;
22
23 class DateParser {
24
25
26 private static final Pattern[] datePatterns = {
27
28
29 Pattern.compile("^(\\d{4}-\\d{1,2}-\\d{1,2})\\s?-\\s?(\\d{4}-\\d{1,2}-\\d{1,2})$"),
30 Pattern.compile("^(\\d{4}-\\d{1,2}-\\d{1,2})$"),
31 Pattern.compile("^(\\d{4})\\s?-\\s?(\\d{4})$"),
32 Pattern.compile("^(\\d{4})-\\[(\\d{4})\\]$"),
33 Pattern.compile("^(\\d{4})-\\[(\\d{4})\\]$"),
34 Pattern.compile("^(\\d{4}s)-\\[(\\d{4}s)\\]$"),
35 Pattern.compile("^\\[(\\d{4})\\]$"),
36 Pattern.compile("^(\\d{4})$"),
37 Pattern.compile("^(\\d{2})th century$"),
38 Pattern.compile("^\\s*(\\d{4})\\s*-\\s*(\\d{4})"),
39
40 Pattern.compile("^\\s*(\\d{4})/(\\d{2})"),
41 Pattern.compile("^\\s*(\\d{4})\\s*/\\s*(\\d{4})"),
42 Pattern.compile("^(\\d{4}-\\d{1,2})/(\\d{4}-\\d{1,2})"),
43 Pattern.compile("^(\\d{4}-\\d{1,2}-\\d{1,2})/(\\d{4}-\\d{1,2}-\\d{1,2})"),
44 Pattern.compile("^(\\d{4})/(\\d{4}-\\d{1,2}-\\d{1,2})")
45 };
46
47
48
49
50
51 private static final DateTimeFormatter isoDateTimeFormat = ISODateTimeFormat.date()
52 .withLocale(Locale.ENGLISH);
53
54
55 private static final SimpleDateFormat yearMonthDateFormat = new SimpleDateFormat("yyyy-MM");
56 private static final SimpleDateFormat yearDateFormat = new SimpleDateFormat("yyyy");
57 private static final XmlImportProperties dates = new XmlImportProperties("dates.properties");
58
59
60 static List<Map<String, Object>> extractDates(Map<String, Object> data) {
61 List<Map<String, Object>> extractedDates = Lists.newArrayList();
62
63 for (String key : data.keySet()) {
64 if (key.equals(Entities.DATE_PERIOD) && data.get(key) instanceof List) {
65 for (Map<String, Object> event : (List<Map<String, Object>>) data.get(key)) {
66 extractedDates.add(getSubNode(event));
67 }
68 }
69 }
70
71 Map<String, String> dateValues = returnDatesAsString(data);
72 for (String s : dateValues.keySet()) {
73 extractDate(s).ifPresent(extractedDates::add);
74 }
75 replaceDates(data, extractedDates);
76 return extractedDates;
77 }
78
79 private static void replaceDates(Map<String, Object> data, List<Map<String, Object>> extractedDates) {
80 Map<String, String> dateValues = returnDatesAsString(data);
81 Map<String, String> dateTypes = Maps.newHashMap();
82 for (String dateValue : dateValues.keySet()) {
83 dateTypes.put(dateValues.get(dateValue), null);
84 }
85 for (Map<String, Object> dateMap : extractedDates) {
86 dateValues.remove(dateMap.get(Ontology.DATE_HAS_DESCRIPTION));
87 }
88
89 for (String datevalue : dateValues.keySet()) {
90 String dateType = dateValues.get(datevalue);
91 if (dateTypes.containsKey(dateType) && dateTypes.get(dateType) != null) {
92 dateTypes.put(dateType, dateTypes.get(dateType) + ", " + datevalue.trim());
93 } else {
94 dateTypes.put(dateType, datevalue.trim());
95 }
96 }
97 for (String dateType : dateTypes.keySet()) {
98 if (dateTypes.get(dateType) == null) {
99 data.remove(dateType);
100 } else {
101 data.put(dateType, dateTypes.get(dateType));
102 }
103 }
104 }
105
106 private static Optional<Map<String, Object>> extractDate(String date) {
107 Map<String, Object> data = matchDate(date);
108 return data.isEmpty() ? Optional.empty() : Optional.of(data);
109 }
110
111 private static Map<String, Object> matchDate(String date) {
112 Map<String, Object> data = Maps.newHashMap();
113 for (Pattern re : datePatterns) {
114 Matcher matcher = re.matcher(date);
115 if (matcher.matches()) {
116 data.put(Ontology.DATE_PERIOD_START_DATE, normaliseDate(matcher.group(1)));
117 data.put(Ontology.DATE_PERIOD_END_DATE, normaliseDate(matcher.group(matcher
118 .groupCount() > 1 ? 2 : 1), true));
119 data.put(Ontology.DATE_HAS_DESCRIPTION, date);
120 break;
121 }
122 }
123 return data;
124 }
125
126 private static Map<String, String> returnDatesAsString(Map<String, Object> data) {
127 Map<String, String> datesAsString = Maps.newHashMap();
128 Object value;
129 for (Map.Entry<String, Object> property : data.entrySet()) {
130 if (dates.containsProperty(property.getKey()) && (value = property.getValue()) != null) {
131 if (property.getValue() instanceof String) {
132 String dateValue = (String) value;
133 for (String d : dateValue.split(",")) {
134 datesAsString.put(d, property.getKey());
135 }
136 } else if (property.getValue() instanceof List) {
137 for (String s : (List<String>) value) {
138 datesAsString.put(s, property.getKey());
139 }
140 }
141 }
142 }
143 return datesAsString;
144 }
145
146 static String normaliseDate(String date) {
147 return normaliseDate(date, false);
148 }
149
150
151
152
153
154
155
156
157
158 static String normaliseDate(String date, boolean endOfPeriod) {
159 String returnDate = isoDateTimeFormat.print(DateTime.parse(date));
160 if (returnDate.startsWith("00")) {
161 returnDate = "19" + returnDate.substring(2);
162 date = "19" + date;
163 }
164 if (endOfPeriod) {
165 if (!date.equals(returnDate)) {
166 ParsePosition p = new ParsePosition(0);
167 yearMonthDateFormat.parse(date, p);
168 if (p.getIndex() > 0) {
169 returnDate = isoDateTimeFormat.print(DateTime.parse(date).plusMonths(1).minusDays(1));
170 } else {
171 p = new ParsePosition(0);
172 yearDateFormat.parse(date, p);
173 if (p.getIndex() > 0) {
174 returnDate = isoDateTimeFormat.print(DateTime.parse(date).plusYears(1).minusDays(1));
175 }
176 }
177 }
178 }
179 return returnDate;
180 }
181 }