1 package eu.ehri.project.utils;
2
3 import com.google.common.base.Splitter;
4 import com.google.common.collect.ImmutableBiMap;
5 import com.google.common.collect.ImmutableMap;
6 import com.google.common.collect.Lists;
7 import com.google.common.collect.Maps;
8 import eu.ehri.project.models.base.Described;
9 import eu.ehri.project.models.base.Description;
10 import eu.ehri.project.models.base.Entity;
11
12 import java.util.Collections;
13 import java.util.Comparator;
14 import java.util.List;
15 import java.util.Locale;
16 import java.util.Map;
17 import java.util.Optional;
18
19
20
21
22
23 public class LanguageHelpers {
24
25 private static final Map<String, Locale> locale2Map;
26 private static final Map<String, String> locale3Map;
27 private static final Map<String, Locale> localeNameMap;
28
29 static {
30 String[] languages = Locale.getISOLanguages();
31 locale2Map = Maps.newHashMapWithExpectedSize(languages.length);
32 locale3Map = Maps.newHashMapWithExpectedSize(languages.length);
33 localeNameMap = Maps.newHashMapWithExpectedSize(languages.length);
34 for (String language : languages) {
35 Locale locale = new Locale(language);
36 locale2Map.put(language, locale);
37 locale3Map.put(locale.getISO3Language(), language);
38 localeNameMap.put(locale.getDisplayLanguage(Locale.ENGLISH).toLowerCase(), locale);
39 }
40 }
41
42
43 private static final Splitter codeSplitter = Splitter.on("-").omitEmptyStrings().limit(2);
44
45
46
47
48
49 private static final ImmutableBiMap<String, String> iso639BibTermLookup = ImmutableBiMap.<String, String>builder()
50 .put("alb", "sqi")
51 .put("arm", "hye")
52 .put("baq", "eus")
53 .put("ger", "deu")
54 .put("dut", "nld")
55 .put("rum", "ron")
56 .put("mac", "mkd")
57 .put("slo", "slk")
58 .put("fre", "fra")
59 .put("cze", "ces")
60 .build();
61
62
63
64
65 private static final ImmutableBiMap<String, String> continentCodes = ImmutableBiMap.<String, String>builder()
66 .put("AF", "Africa")
67 .put("AN", "Antarctica")
68 .put("AS", "Asia")
69 .put("EU", "Europe")
70 .put("NA", "North America")
71 .put("OC", "Australia")
72 .put("SA", "South America")
73 .build();
74
75 private static final ImmutableMap<String, String> countryCodesToContinents = ImmutableMap.<String, String>builder()
76 .put("AD", "EU")
77 .put("AE", "AS")
78 .put("AF", "AS")
79 .put("AG", "NA")
80 .put("AI", "NA")
81 .put("AL", "EU")
82 .put("AM", "AS")
83 .put("AN", "NA")
84 .put("AO", "AF")
85 .put("AP", "AS")
86 .put("AQ", "AN")
87 .put("AR", "SA")
88 .put("AS", "OC")
89 .put("AT", "EU")
90 .put("AU", "OC")
91 .put("AW", "NA")
92 .put("AX", "EU")
93 .put("AZ", "AS")
94 .put("BA", "EU")
95 .put("BB", "NA")
96 .put("BD", "AS")
97 .put("BE", "EU")
98 .put("BF", "AF")
99 .put("BG", "EU")
100 .put("BH", "AS")
101 .put("BI", "AF")
102 .put("BJ", "AF")
103 .put("BL", "NA")
104 .put("BM", "NA")
105 .put("BN", "AS")
106 .put("BO", "SA")
107 .put("BR", "SA")
108 .put("BS", "NA")
109 .put("BT", "AS")
110 .put("BV", "AN")
111 .put("BW", "AF")
112 .put("BY", "EU")
113 .put("BZ", "NA")
114 .put("CA", "NA")
115 .put("CC", "AS")
116 .put("CD", "AF")
117 .put("CF", "AF")
118 .put("CG", "AF")
119 .put("CH", "EU")
120 .put("CI", "AF")
121 .put("CK", "OC")
122 .put("CL", "SA")
123 .put("CM", "AF")
124 .put("CN", "AS")
125 .put("CO", "SA")
126 .put("CR", "NA")
127 .put("CU", "NA")
128 .put("CV", "AF")
129 .put("CX", "AS")
130 .put("CY", "AS")
131 .put("CZ", "EU")
132 .put("DE", "EU")
133 .put("DJ", "AF")
134 .put("DK", "EU")
135 .put("DM", "NA")
136 .put("DO", "NA")
137 .put("DZ", "AF")
138 .put("EC", "SA")
139 .put("EE", "EU")
140 .put("EG", "AF")
141 .put("EH", "AF")
142 .put("ER", "AF")
143 .put("ES", "EU")
144 .put("ET", "AF")
145 .put("EU", "EU")
146 .put("FI", "EU")
147 .put("FJ", "OC")
148 .put("FK", "SA")
149 .put("FM", "OC")
150 .put("FO", "EU")
151 .put("FR", "EU")
152 .put("FX", "EU")
153 .put("GA", "AF")
154 .put("GB", "EU")
155 .put("GD", "NA")
156 .put("GE", "AS")
157 .put("GF", "SA")
158 .put("GG", "EU")
159 .put("GH", "AF")
160 .put("GI", "EU")
161 .put("GL", "NA")
162 .put("GM", "AF")
163 .put("GN", "AF")
164 .put("GP", "NA")
165 .put("GQ", "AF")
166 .put("GR", "EU")
167 .put("GS", "AN")
168 .put("GT", "NA")
169 .put("GU", "OC")
170 .put("GW", "AF")
171 .put("GY", "SA")
172 .put("HK", "AS")
173 .put("HM", "AN")
174 .put("HN", "NA")
175 .put("HR", "EU")
176 .put("HT", "NA")
177 .put("HU", "EU")
178 .put("ID", "AS")
179 .put("IE", "EU")
180 .put("IL", "AS")
181 .put("IM", "EU")
182 .put("IN", "AS")
183 .put("IO", "AS")
184 .put("IQ", "AS")
185 .put("IR", "AS")
186 .put("IS", "EU")
187 .put("IT", "EU")
188 .put("JE", "EU")
189 .put("JM", "NA")
190 .put("JO", "AS")
191 .put("JP", "AS")
192 .put("KE", "AF")
193 .put("KG", "AS")
194 .put("KH", "AS")
195 .put("KI", "OC")
196 .put("KM", "AF")
197 .put("KN", "NA")
198 .put("KP", "AS")
199 .put("KR", "AS")
200 .put("KW", "AS")
201 .put("KY", "NA")
202 .put("KZ", "AS")
203 .put("LA", "AS")
204 .put("LB", "AS")
205 .put("LC", "NA")
206 .put("LI", "EU")
207 .put("LK", "AS")
208 .put("LR", "AF")
209 .put("LS", "AF")
210 .put("LT", "EU")
211 .put("LU", "EU")
212 .put("LV", "EU")
213 .put("LY", "AF")
214 .put("MA", "AF")
215 .put("MC", "EU")
216 .put("MD", "EU")
217 .put("ME", "EU")
218 .put("MF", "NA")
219 .put("MG", "AF")
220 .put("MH", "OC")
221 .put("MK", "EU")
222 .put("ML", "AF")
223 .put("MM", "AS")
224 .put("MN", "AS")
225 .put("MO", "AS")
226 .put("MP", "OC")
227 .put("MQ", "NA")
228 .put("MR", "AF")
229 .put("MS", "NA")
230 .put("MT", "EU")
231 .put("MU", "AF")
232 .put("MV", "AS")
233 .put("MW", "AF")
234 .put("MX", "NA")
235 .put("MY", "AS")
236 .put("MZ", "AF")
237 .put("NA", "AF")
238 .put("NC", "OC")
239 .put("NE", "AF")
240 .put("NF", "OC")
241 .put("NG", "AF")
242 .put("NI", "NA")
243 .put("NL", "EU")
244 .put("NO", "EU")
245 .put("NP", "AS")
246 .put("NR", "OC")
247 .put("NU", "OC")
248 .put("NZ", "OC")
249 .put("O1", "--")
250 .put("OM", "AS")
251 .put("PA", "NA")
252 .put("PE", "SA")
253 .put("PF", "OC")
254 .put("PG", "OC")
255 .put("PH", "AS")
256 .put("PK", "AS")
257 .put("PL", "EU")
258 .put("PM", "NA")
259 .put("PN", "OC")
260 .put("PR", "NA")
261 .put("PS", "AS")
262 .put("PT", "EU")
263 .put("PW", "OC")
264 .put("PY", "SA")
265 .put("QA", "AS")
266 .put("RE", "AF")
267 .put("RO", "EU")
268 .put("RS", "EU")
269 .put("RU", "EU")
270 .put("RW", "AF")
271 .put("SA", "AS")
272 .put("SB", "OC")
273 .put("SC", "AF")
274 .put("SD", "AF")
275 .put("SE", "EU")
276 .put("SG", "AS")
277 .put("SH", "AF")
278 .put("SI", "EU")
279 .put("SJ", "EU")
280 .put("SK", "EU")
281 .put("SL", "AF")
282 .put("SM", "EU")
283 .put("SN", "AF")
284 .put("SO", "AF")
285 .put("SR", "SA")
286 .put("ST", "AF")
287 .put("SV", "NA")
288 .put("SY", "AS")
289 .put("SZ", "AF")
290 .put("TC", "NA")
291 .put("TD", "AF")
292 .put("TF", "AN")
293 .put("TG", "AF")
294 .put("TH", "AS")
295 .put("TJ", "AS")
296 .put("TK", "OC")
297 .put("TL", "AS")
298 .put("TM", "AS")
299 .put("TN", "AF")
300 .put("TO", "OC")
301 .put("TR", "EU")
302 .put("TT", "NA")
303 .put("TV", "OC")
304 .put("TW", "AS")
305 .put("TZ", "AF")
306 .put("UA", "EU")
307 .put("UG", "AF")
308 .put("UM", "OC")
309 .put("US", "NA")
310 .put("UY", "SA")
311 .put("UZ", "AS")
312 .put("VA", "EU")
313 .put("VC", "NA")
314 .put("VE", "SA")
315 .put("VG", "NA")
316 .put("VI", "NA")
317 .put("VN", "AS")
318 .put("VU", "OC")
319 .put("WF", "OC")
320 .put("WS", "OC")
321 .put("YE", "AS")
322 .put("YT", "AF")
323 .put("ZA", "AF")
324 .put("ZM", "AF")
325 .put("ZW", "AF")
326 .build();
327
328 public static Optional<String> countryCodeToContinent(String countryCode) {
329 String continentCode = countryCodesToContinents.get(countryCode.toUpperCase());
330 if (continentCode != null) {
331 return Optional.ofNullable(continentCodes.get(continentCode));
332 }
333 return Optional.empty();
334 }
335
336
337
338
339
340
341
342
343
344
345 public static Optional<Description> getBestDescription(Described item, Optional<Description> priorDescOpt, String langCode) {
346 List<Description> descriptions = Lists.newArrayList(item.getDescriptions());
347 descriptions.sort(Comparator.comparing(Entity::getId));
348 Description fallBack = null;
349 for (Description description : descriptions) {
350 if (fallBack == null) {
351 fallBack = description;
352 }
353
354
355
356 for (Description parent : priorDescOpt.map(Collections::singleton).orElse(Collections.emptySet())) {
357 for (String code : Optional.ofNullable(parent.getDescriptionCode())
358 .map(Collections::singleton).orElse(Collections.emptySet())) {
359 if (code.equals(description.getDescriptionCode())) {
360 return Optional.of(description);
361 }
362 }
363 }
364
365
366 if (description.getLanguageOfDescription().equalsIgnoreCase(langCode)) {
367 return Optional.of(description);
368 }
369 }
370 return Optional.ofNullable(fallBack);
371 }
372
373 public static Optional<Description> getBestDescription(Described item, String langCode) {
374 return getBestDescription(item, Optional.empty(), langCode);
375 }
376
377
378
379
380
381
382
383
384 public static String iso639DashTwoCode(String nameOrCode) {
385 if (nameOrCode.length() == 2 && locale2Map.containsKey(nameOrCode)) {
386 return locale2Map.get(nameOrCode).getISO3Language();
387 } else if (nameOrCode.length() == 3 && iso639BibTermLookup.containsKey(nameOrCode)) {
388 return iso639BibTermLookup.get(nameOrCode);
389 } else if (nameOrCode.length() > 3 && localeNameMap.containsKey(nameOrCode.toLowerCase())) {
390 return localeNameMap.get(nameOrCode.toLowerCase()).getISO3Language();
391
392
393
394
395 }
396 return nameOrCode;
397 }
398
399
400
401
402
403
404
405
406 public static String iso639DashOneCode(String nameOrCode) {
407 if (nameOrCode.length() == 3 && locale3Map.containsKey(nameOrCode)) {
408 return locale3Map.get(nameOrCode);
409 } else if (nameOrCode.length() == 3 && iso639BibTermLookup.containsKey(nameOrCode)) {
410 return locale3Map.get(iso639BibTermLookup.get(nameOrCode));
411 } else if (nameOrCode.length() > 3 && localeNameMap.containsKey(nameOrCode.toLowerCase())) {
412 return localeNameMap.get(nameOrCode.toLowerCase()).getLanguage();
413 } else if (nameOrCode.length() > 2 && nameOrCode.contains("-")) {
414
415 List<String> parts = Lists.newArrayList(codeSplitter.split(nameOrCode));
416 if (parts.size() == 1) {
417 return iso639DashOneCode(parts.get(0));
418 } else if (parts.size() == 2) {
419 return iso639DashOneCode(parts.get(0)) + "-" + parts.get(1);
420 }
421 }
422 return nameOrCode;
423 }
424
425
426
427
428
429
430
431
432
433 public static String codeToName(String code) {
434 if (code.length() == 2 && locale2Map.containsKey(code)) {
435 return locale2Map.get(code).getDisplayLanguage(Locale.ENGLISH);
436 } else if (code.length() == 3) {
437 String termCode = iso639BibTermLookup.containsKey(code)
438 ? iso639BibTermLookup.get(code)
439 : code;
440 String twoCode = locale3Map.get(termCode);
441 if (locale2Map.containsKey(twoCode)) {
442 return locale2Map.get(twoCode).getDisplayLanguage(Locale.ENGLISH);
443 }
444 }
445 return code;
446 }
447
448
449
450
451
452
453
454 public static String iso3166dashOneCodeToName(String code) {
455 return new Locale(Locale.ENGLISH.getLanguage(), code)
456 .getDisplayCountry(Locale.ENGLISH);
457 }
458
459
460
461
462
463
464
465 public static String countryCodeToName(String code) {
466 return new java.util.Locale(Locale.ENGLISH.getLanguage(), code)
467 .getDisplayCountry();
468 }
469 }