Skip to content

Commit 60d667c

Browse files
CLDR-18932 Update ZoneParser update process (#5034)
1 parent f9772ad commit 60d667c

File tree

18 files changed

+18720
-19193
lines changed

18 files changed

+18720
-19193
lines changed

docs/site/development/updating-codes/update-time-zone-data-for-zoneparser.md

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,13 @@ title: Update Time Zone Data for ZoneParser
66

77
Note: This is usually done as a part of full time zone data update process.
88

9-
1. Download the latest version of IANA Time Zone Database page: https://www.iana.org/time\-zones
10-
- There are 3 links available for latest version. Select the complete distribution tzdb\-\<version\>.tar.lz (e.g. tzdb\-2021a.tar.lz).
11-
- Extract entire contents to a work directory.
12-
- **Note**: The data only distribution contains minimum set of files you really need. However, you cannot use a convenient make target without codes. The complete distribution package contains the codes.
13-
2. Run make target \- rearguard\_tarballs\_version
14-
- This target creates "rearguard" version of zoneinfo files under directory: tzdataunknown\-rearguard.dir.
15-
- **Note**: If you specify a version (e.g. VERSION\=2021\) when invoking the target, "unknown" will be replaced with the specified version (e.g. tzdata2021a\-rearguard.dir), but it's not important in this instruction.
16-
- A standard zoneinfo file may use negative daylight saving time offsets. CLDR code currently can not handle negative daylight saving time offsets. The "rearguard" version is designed for tools without negative daylight saving time support.
17-
3. Copy files generated by previous step to {CLDR\_DIR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data
18-
- Below the list of files to be include:
19-
- africa
20-
- antarctica
21-
- asia
22-
- australasia
23-
- backward
24-
- etcetera
25-
- europe
26-
- leapseconds
27-
- northamerica
28-
- southamerica
29-
- zone.tab
30-
- **Note**: leapseconds might be removed from the list later.
31-
4. Edit the file {CLDR\_DIR}}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/tzdb\-version.txt
32-
- This file contains just one line text specifying a version of Time Zone Database, e.g. 2021a.
9+
1. Download and unpack the latest version of the TZDB from the [IANA Time Zone Database page](https://www.iana.org/time-zones)
10+
- `mkdir tzdata && wget -qO- https://data.iana.org/time-zones/tzdata-latest.tar.gz | tar -xzf - -C tzdata`
11+
2. Generate the "rearguard" version of the TZDB
12+
- CLDR display names assume positive daylight saving offsets. The "rearguard" version is designed for tools without negative daylight saving time support.
13+
- `make -C tzdata rearguard.zi`
14+
3. Copy `rearguard.zi`, `version` and `zone.tab` to `tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/`
15+
- `cp tzdata/{rearguard.zi,version,zone.tab} tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/`
16+
4. Clean up the working directory
17+
- `rm -r tzdata`
3318
5. **Record the version: See** [**Updating External Metadata**](/development/updating-codes/external-version-metadata)
34-

tools/cldr-code/src/main/java/org/unicode/cldr/util/ZoneParser.java

Lines changed: 82 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,6 @@ private void make_zone_to_country() {
8888
country_to_zoneSet = CldrUtility.protectCollection(country_to_zoneSet);
8989
}
9090

91-
/**
92-
* private Map bogusZones = null;
93-
*
94-
* <p>private Map getAliasMap() { if (bogusZones == null) { try { bogusZones = new TreeMap();
95-
* BufferedReader in = Utility.getUTF8Data"TimeZoneAliases.txt"); while (true) { String line =
96-
* in.readLine(); if (line == null) break; line = line.trim(); int pos = line.indexOf('#'); if
97-
* (pos >= 0) { skippedAliases.add(line); line = line.substring(0,pos).trim(); } if
98-
* (line.length() == 0) continue; List pieces = Utility.splitList(line,';', true);
99-
* bogusZones.put(pieces.get(0), pieces.get(1)); } in.close(); } catch (IOException e) { throw
100-
* new IllegalArgumentException("Can't find timezone aliases"); } } return bogusZones; }
101-
*/
10291
Map<String, List<String>> zoneData;
10392

10493
Set<String> skippedAliases = new TreeSet<>();
@@ -567,18 +556,6 @@ private List<String> getData(String s) {
567556
regionalCompare.add("Etc");
568557
}
569558

570-
private static String[] TZFiles = {
571-
"africa",
572-
"antarctica",
573-
"asia",
574-
"australasia",
575-
"backward",
576-
"etcetera",
577-
"europe",
578-
"northamerica",
579-
"southamerica"
580-
};
581-
582559
private static Map<String, String> FIX_UNSTABLE_TZIDS;
583560

584561
private static Set<String> SKIP_LINKS =
@@ -664,7 +641,7 @@ private List<String> getData(String s) {
664641
private void makeZoneData() {
665642
try {
666643
// get version
667-
BufferedReader versionIn = CldrUtility.getUTF8Data("tzdb-version.txt");
644+
BufferedReader versionIn = CldrUtility.getUTF8Data("tzdb/version");
668645
version = versionIn.readLine();
669646
if (!version.matches("[0-9]{4}[a-z]")) {
670647
throw new IllegalArgumentException(
@@ -677,7 +654,7 @@ private void makeZoneData() {
677654
String deg = "([+-])([0-9][0-9][0-9]?)([0-9][0-9])([0-9][0-9])?"; //
678655
Matcher m = PatternCache.get(deg + deg).matcher("");
679656
zoneData = new TreeMap<>();
680-
BufferedReader in = CldrUtility.getUTF8Data("zone.tab");
657+
BufferedReader in = CldrUtility.getUTF8Data("tzdb/zone.tab");
681658
while (true) {
682659
String line = in.readLine();
683660
if (line == null) break;
@@ -749,95 +726,92 @@ private void makeZoneData() {
749726
// now get links
750727
Pattern whitespace = PatternCache.get("\\s+");
751728
XEquivalenceClass<String, String> linkedItems = new XEquivalenceClass<>("None");
752-
for (int i = 0; i < TZFiles.length; ++i) {
753-
in = CldrUtility.getUTF8Data(TZFiles[i]);
754-
String zoneID = null;
755-
while (true) {
756-
String line = in.readLine();
757-
if (line == null) break;
758-
String originalLine = line;
759-
int commentPos = line.indexOf("#");
760-
String comment = null;
761-
if (commentPos >= 0) {
762-
comment = line.substring(commentPos + 1).trim();
763-
line = line.substring(0, commentPos);
729+
in = CldrUtility.getUTF8Data("tzdb/rearguard.zi");
730+
String zoneID = null;
731+
while (true) {
732+
String line = in.readLine();
733+
if (line == null) break;
734+
String originalLine = line;
735+
int commentPos = line.indexOf("#");
736+
String comment = null;
737+
if (commentPos >= 0) {
738+
comment = line.substring(commentPos + 1).trim();
739+
line = line.substring(0, commentPos);
740+
}
741+
line = line.trim();
742+
if (line.length() == 0) continue;
743+
String[] items = whitespace.split(line);
744+
if (zoneID != null || items[0].equals("Zone")) {
745+
List<String> l = new ArrayList<>();
746+
l.addAll(Arrays.asList(items));
747+
748+
// Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01
749+
// 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
750+
if (zoneID == null) {
751+
l.remove(0); // "Zone"
752+
zoneID = l.get(0);
753+
String ntzid = FIX_UNSTABLE_TZIDS.get(zoneID);
754+
if (ntzid != null) zoneID = ntzid;
755+
l.remove(0);
756+
}
757+
List<ZoneLine> zoneRules = zone_rules.get(zoneID);
758+
if (zoneRules == null) {
759+
zoneRules = new ArrayList<>();
760+
zone_rules.put(zoneID, zoneRules);
764761
}
765-
line = line.trim();
766-
if (line.length() == 0) continue;
767-
String[] items = whitespace.split(line);
768-
if (zoneID != null || items[0].equals("Zone")) {
769-
List<String> l = new ArrayList<>();
770-
l.addAll(Arrays.asList(items));
771-
772-
// Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01
773-
// 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
774-
if (zoneID == null) {
775-
l.remove(0); // "Zone"
776-
zoneID = l.get(0);
777-
String ntzid = FIX_UNSTABLE_TZIDS.get(zoneID);
778-
if (ntzid != null) zoneID = ntzid;
779-
l.remove(0);
780-
}
781-
List<ZoneLine> zoneRules = zone_rules.get(zoneID);
782-
if (zoneRules == null) {
783-
zoneRules = new ArrayList<>();
784-
zone_rules.put(zoneID, zoneRules);
785-
}
786762

787-
if (l.size() < ZoneLine.FIELD_COUNT
788-
|| l.size() > ZoneLine.FIELD_COUNT_UNTIL) {
789-
System.out.println("***Zone incorrect field count:");
790-
System.out.println(l);
791-
System.out.println(originalLine);
792-
}
763+
if (l.size() < ZoneLine.FIELD_COUNT || l.size() > ZoneLine.FIELD_COUNT_UNTIL) {
764+
System.out.println("***Zone incorrect field count:");
765+
System.out.println(l);
766+
System.out.println(originalLine);
767+
}
793768

794-
ZoneLine zoneLine = new ZoneLine(l);
795-
zoneLine.comment = comment;
796-
zoneRules.add(zoneLine);
797-
if (l.size() == ZoneLine.FIELD_COUNT) {
798-
zoneID = null; // no continuation line
799-
}
800-
} else if (items[0].equals("Rule")) {
801-
// # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
802-
// Rule Algeria 1916 only - Jun 14 23:00s 1:00 S
803-
804-
String ruleID = items[1];
805-
List<RuleLine> ruleList = ruleID_rules.get(ruleID);
806-
if (ruleList == null) {
807-
ruleList = new ArrayList<>();
808-
ruleID_rules.put(ruleID, ruleList);
809-
}
810-
List<String> l = new ArrayList<>();
811-
l.addAll(Arrays.asList(items));
812-
l.remove(0);
813-
l.remove(0);
814-
if (l.size() != RuleLine.FIELD_COUNT) {
815-
System.out.println("***Rule incorrect field count:");
816-
System.out.println(l);
817-
}
818-
if (comment != null) l.add(comment);
819-
RuleLine ruleLine = new RuleLine(l);
820-
ruleList.add(ruleLine);
821-
822-
} else if (items[0].equals("Link")) {
823-
String old = items[2];
824-
String newOne = items[1];
825-
if (!(SKIP_LINKS.contains(old) && SKIP_LINKS.contains(newOne))) {
826-
// System.out.println("Original " + old + "\t=>\t" + newOne);
827-
linkedItems.add(old, newOne);
828-
}
829-
/*
830-
* String conflict = (String) linkold_new.get(old); if (conflict !=
831-
* null) { System.out.println("Conflict with old: " + old + " => " +
832-
* conflict + ", " + newOne); } System.out.println(old + "\t=>\t" +
833-
* newOne); linkold_new.put(old, newOne);
834-
*/
835-
} else {
836-
if (DEBUG) System.out.println("Unknown zone line: " + line);
769+
ZoneLine zoneLine = new ZoneLine(l);
770+
zoneLine.comment = comment;
771+
zoneRules.add(zoneLine);
772+
if (l.size() == ZoneLine.FIELD_COUNT) {
773+
zoneID = null; // no continuation line
774+
}
775+
} else if (items[0].equals("Rule")) {
776+
// # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
777+
// Rule Algeria 1916 only - Jun 14 23:00s 1:00 S
778+
779+
String ruleID = items[1];
780+
List<RuleLine> ruleList = ruleID_rules.get(ruleID);
781+
if (ruleList == null) {
782+
ruleList = new ArrayList<>();
783+
ruleID_rules.put(ruleID, ruleList);
837784
}
785+
List<String> l = new ArrayList<>();
786+
l.addAll(Arrays.asList(items));
787+
l.remove(0);
788+
l.remove(0);
789+
if (l.size() != RuleLine.FIELD_COUNT) {
790+
System.out.println("***Rule incorrect field count:");
791+
System.out.println(l);
792+
}
793+
if (comment != null) l.add(comment);
794+
RuleLine ruleLine = new RuleLine(l);
795+
ruleList.add(ruleLine);
796+
797+
} else if (items[0].equals("Link")) {
798+
String old = items[2];
799+
String newOne = items[1];
800+
if (!(SKIP_LINKS.contains(old) && SKIP_LINKS.contains(newOne))) {
801+
// System.out.println("Original " + old + "\t=>\t" + newOne);
802+
linkedItems.add(old, newOne);
803+
}
804+
/*
805+
* String conflict = (String) linkold_new.get(old); if (conflict !=
806+
* null) { System.out.println("Conflict with old: " + old + " => " +
807+
* conflict + ", " + newOne); } System.out.println(old + "\t=>\t" +
808+
* newOne); linkold_new.put(old, newOne);
809+
*/
810+
} else {
811+
if (DEBUG) System.out.println("Unknown zone line: " + line);
838812
}
839-
in.close();
840813
}
814+
in.close();
841815
// add in stuff that should be links
842816
for (int i = 0; i < ADD_ZONE_ALIASES_DATA.length; ++i) {
843817
linkedItems.add(ADD_ZONE_ALIASES_DATA[i][0], ADD_ZONE_ALIASES_DATA[i][1]);

tools/cldr-code/src/main/resources/org/unicode/cldr/util/MainTimeZones.txt

Lines changed: 0 additions & 33 deletions
This file was deleted.

0 commit comments

Comments
 (0)