|
| 1 | +<!-- Version 4.0 --> |
| 2 | + |
| 3 | +<!-- datetime.xml --> |
| 4 | +<!-- This file contains the general formulas for parsing date/time formats. --> |
| 5 | + |
| 6 | +<datetime> |
| 7 | + |
| 8 | +<define name="_year" extract="year"> |
| 9 | + <text><![CDATA[(20\d\d|19\d\d|[9012]\d(?!\d))]]></text> |
| 10 | +</define> |
| 11 | + |
| 12 | +<define name="_month" extract="month"> |
| 13 | + <text><![CDATA[(0?[1-9]|1[012])(?!:)]]></text> |
| 14 | +</define> |
| 15 | + |
| 16 | +<define name="_litmonth" extract="litmonth"> |
| 17 | + <text><![CDATA[(?<![\d\w])(jan|\x{3127}\x{6708}|feb|\x{4E8C}\x{6708}|mar|\x{4E09}\x{6708}|apr|\x{56DB}\x{6708}|may|\x{4E94}\x{6708}|jun|\x{516D}\x{6708}|jul|\x{4E03}\x{6708}|aug|\x{516B}\x{6708}|sep|\x{4E5D}\x{6708}|oct|\x{5341}\x{6708}|nov|\x{5341}\x{3127}\x{6708}|dec|\x{5341}\x{4E8C}\x{6708})[a-z,\.;]*]]></text> |
| 18 | +</define> |
| 19 | + |
| 20 | +<define name="_allmonth" extract="litmonth, month"> |
| 21 | + <text><![CDATA[(?:]]></text> |
| 22 | + <use name="_litmonth"/> |
| 23 | + <text><![CDATA[|]]></text> |
| 24 | + <use name="_month"/> |
| 25 | + <text><![CDATA[)]]></text> |
| 26 | +</define> |
| 27 | + |
| 28 | +<define name="_day" extract="day"> |
| 29 | + <text><![CDATA[(0?[1-9]|[12]\d|3[01])]]></text> |
| 30 | +</define> |
| 31 | + |
| 32 | +<define name="_usday" extract="day"> |
| 33 | + <use name="_day"/> |
| 34 | + <text><![CDATA[(?:st|nd|rd|th|[,\.;])?]]></text> |
| 35 | +</define> |
| 36 | + |
| 37 | +<define name="_hour" extract="hour"> |
| 38 | + <text><![CDATA[([01]?[0-9]|[012][0-3])(?!\d)]]></text> |
| 39 | +</define> |
| 40 | + |
| 41 | +<define name="_minute" extract="minute"> |
| 42 | + <text><![CDATA[([0-6]\d)(?!\d)]]></text> |
| 43 | +</define> |
| 44 | + |
| 45 | +<define name="_second" extract="second"> |
| 46 | + <text><![CDATA[([0-6]\d)(?!\d)]]></text> |
| 47 | +</define> |
| 48 | + |
| 49 | +<define name="_zone" extract="zone"> |
| 50 | + <text><![CDATA[((?:(?:UT|UTC|GMT(?![+-])|CET|CEST|CETDST|MET|MEST|METDST|MEZ|MESZ|EET|EEST|EETDST|WET|WEST|WETDST|MSK|MSD|IST|JST|KST|HKT|AST|ADT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|CAST|CADT|EAST|EADT|WAST|WADT|Z)|(?:GMT)?[+-]\d\d?:?(?:\d\d)?)(?!\w))?]]></text> |
| 51 | +</define> |
| 52 | + |
| 53 | +<define name="_ampm" extract="ampm"> |
| 54 | + <text><![CDATA[([ap]m(?:[^A-Za-z0-9]|$)|[\x{4E0A}\x{4E0B}]\x{5348})?]]></text> |
| 55 | +</define> |
| 56 | + |
| 57 | +<define name="_time" extract="hour, minute, second, subsecond, ampm, zone"> |
| 58 | + <text><![CDATA[(?<!\d)]]></text> |
| 59 | + <use name="_hour"/> |
| 60 | + <text><![CDATA[:]]></text> |
| 61 | + <use name="_minute"/> |
| 62 | + <text><![CDATA[:]]></text> |
| 63 | + <use name="_second"/> |
| 64 | + <text><)? {0,2}]]></text> |
| 65 | + <use name="_ampm"/> |
| 66 | + <text><![CDATA[ {0,2}]]></text> |
| 67 | + <use name="_zone"/> |
| 68 | + <text><![CDATA[(?!:\d)]]></text> |
| 69 | +</define> |
| 70 | + |
| 71 | +<define name="_hmtime" extract="hour, minute, ampm"> |
| 72 | + <text><![CDATA[(?<!\d)]]></text> |
| 73 | + <use name="_hour"/> |
| 74 | + <text><![CDATA[:]]></text> |
| 75 | + <use name="_minute"/> |
| 76 | + <text><![CDATA[(?: ([ap]m(?:[^A-Za-z0-9]|$)|[\x{4E0A}\x{4E0B}]\x{5348}))?(?!:[:\d])]]></text> |
| 77 | +</define> |
| 78 | + |
| 79 | + |
| 80 | +<define name="_dottime" extract="hour, minute, second, subsecond, zone"> |
| 81 | + <text><![CDATA[(?<![\d\.])([01]\d|2[0-3])\.]]></text> |
| 82 | + <use name="_minute"/> |
| 83 | + <text><![CDATA[(?:\.?]]></text> |
| 84 | + <use name="_second"/> |
| 85 | + <text><![CDATA[(?:[:,]\d+)?(?:\.(\d\d\d\d+))?) {0,2}]]></text> |
| 86 | + <use name="_zone"/> |
| 87 | + <text><![CDATA[(?![0-9\.])]]></text> |
| 88 | +</define> |
| 89 | + |
| 90 | +<define name="_combdatetime" extract="year, month, day, hour, minute, second, subsecond"> |
| 91 | + <!-- ... 20060502-000002 GMT ... --> |
| 92 | + <text><![CDATA[(?<![\d\.])(20\d\d)(0\d|1[012])([012]\d|3[01])[.-]?([01]\d|2[0123])([0-6]\d)([0-6]\d)(?:\.?(\d+))?]]>\s*</text> |
| 93 | + <use name="_zone"/> |
| 94 | +</define> |
| 95 | + |
| 96 | +<define name="_combdatetime2" extract="year, ignored_sep, month, day, hour, minute, second, zone"> |
| 97 | + <!-- ... 2007-3-22 0:0:2 GMT ...' --> |
| 98 | + <!-- ... 2007/3/22 0:0:2 GMT ...' --> |
| 99 | + <text><![CDATA[(?<![\d\.])(20\d\d)([-/])([01]?\d)\2([012]?\d|3[01])\s+([012]?\d):([0-6]?\d):([0-6]?\d)]]>\s*</text> |
| 100 | + <use name="_zone"/> |
| 101 | +</define> |
| 102 | + |
| 103 | + |
| 104 | + |
| 105 | +<define name="_usdate" extract="litmonth, month, ignored_sep, day, zone, ignored_sep2, year"> |
| 106 | + <text><![CDATA[(?<!\w|\d[:\.\-])]]></text> |
| 107 | + <use name="_allmonth"/> |
| 108 | + <text><![CDATA[([/\- ]) {0,2}]]></text> |
| 109 | + <use name="_day"/> |
| 110 | + <text><![CDATA[(?!:) {0,2}(?:\d\d:\d\d:\d\d(?:[\.\,]\d+)? {0,2}]]></text> |
| 111 | + <use name="_zone"/> |
| 112 | + <text><![CDATA[)?((?:\3|,) {0,2}]]></text> |
| 113 | + <use name="_year"/> |
| 114 | + <text><![CDATA[)?(?!/|\w|\.\d)]]></text> |
| 115 | +</define> |
| 116 | + |
| 117 | +<!-- Jan 21, 09. allows spaces with litmonth only --> |
| 118 | +<define name="_usdate1" extract="litmonth, ignored_sep, day, zone, ignored_sep2, year"> |
| 119 | + <text><![CDATA[(?<!\w|\d[:\.\-])]]></text> |
| 120 | + <use name="_litmonth"/> |
| 121 | + <text><![CDATA[([/\- ]) {0,2}]]></text> |
| 122 | + <use name="_day"/> |
| 123 | + <text><![CDATA[(?!:) {0,2}(?:\d\d:\d\d:\d\d(?:[\.\,]\d+)? {0,2}]]></text> |
| 124 | + <use name="_zone"/> |
| 125 | + <text><![CDATA[)?((?:\2|,) {0,2}]]></text> |
| 126 | + <use name="_year"/> |
| 127 | + <text><![CDATA[)?(?!/|\w|\.\d)]]></text> |
| 128 | +</define> |
| 129 | + |
| 130 | +<!-- 10/21/09. doesn't allow spaces (e.g. 10 21 09) with numeric month --> |
| 131 | +<define name="_usdate2" extract="month, ignored_sep, day, zone, ignored_sep2, year"> |
| 132 | + <text><![CDATA[(?<!\w|\d[:\.\-])]]></text> |
| 133 | + <use name="_month"/> |
| 134 | + <text><![CDATA[([/\-])]]></text> |
| 135 | + <use name="_day"/> |
| 136 | + <text><![CDATA[(?!:)(?:\d\d:\d\d:\d\d(?:[\.\,]\d+)? {0,2}]]></text> |
| 137 | + <use name="_zone"/> |
| 138 | + <text><![CDATA[)?((?:\2)]]></text> |
| 139 | + <use name="_year"/> |
| 140 | + <text><![CDATA[)?(?!/|\w|\.\d)]]></text> |
| 141 | +</define> |
| 142 | + |
| 143 | + |
| 144 | +<define name="_isodate" extract="year, ignored_sep, litmonth, month, day"> |
| 145 | + <text><![CDATA[(?<![\w\d])]]></text> |
| 146 | + <use name="_year"/> |
| 147 | + <text><![CDATA[([\./\- ])]]></text> |
| 148 | + <use name="_allmonth"/> |
| 149 | + <text><![CDATA[(?!\d)(?:[\./\- ] {0,2})?]]></text> |
| 150 | + <use name="_day"/> |
| 151 | + <text><![CDATA[(?!/)(?:(?=T)|(?!\w)(?!\.\d))]]></text> |
| 152 | +</define> |
| 153 | + |
| 154 | +<!-- eurodate format. period/dot delim separated out to eurodate2 --> |
| 155 | +<define name="_eurodate1" extract="day, ignored_sep, litmonth, month, year"> |
| 156 | + <text><![CDATA[(?<![\w\.])]]></text> |
| 157 | + <use name="_usday"/> |
| 158 | + <text><![CDATA[([\- /]) {0,2}]]></text> |
| 159 | + <use name="_allmonth"/> |
| 160 | + <text><![CDATA[\2 {0,2}]]></text> |
| 161 | + <use name="_year"/> |
| 162 | + <text><![CDATA[(?![\w\.])]]></text> |
| 163 | +</define> |
| 164 | + |
| 165 | +<!-- just period/dot delimiter. do not allow any spaces after dots (e.g. "version 5.4. 10" --> |
| 166 | +<define name="_eurodate2" extract="day, litmonth, month, year"> |
| 167 | + <text><![CDATA[(?<![\w\.])]]></text> |
| 168 | + <use name="_usday"/> |
| 169 | + <text><![CDATA[\.]]></text> |
| 170 | + <use name="_allmonth"/> |
| 171 | + <text><![CDATA[\.]]></text> |
| 172 | + <use name="_year"/> |
| 173 | + <text><![CDATA[(?![\w\.])]]></text> |
| 174 | +</define> |
| 175 | + |
| 176 | + |
| 177 | +<define name="_bareurlitdate" extract="day, litmonth, year"> |
| 178 | + <text><![CDATA[(\d\d?)\|\|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\|\|(20\d\d)]]></text> |
| 179 | +</define> |
| 180 | + |
| 181 | +<define name="_orddate" extract="year, ord"> |
| 182 | + <text><![CDATA[\s([01]\d)([0123]\d\d)\s]]></text> |
| 183 | +</define> |
| 184 | + |
| 185 | +<!-- due to high number of false positive matches, this format is |
| 186 | + limited to special cases. either at the start of a line or in |
| 187 | + filename matches only, by prefixing with a "source::" --> |
| 188 | + |
| 189 | +<!-- don't allow multiple spaces after mashed date. indicates number in column --> |
| 190 | +<define name="_masheddate" extract="year, month, day"> |
| 191 | + <text><![CDATA[(?:^|source::).*?(?<!\d|\d\.|-)(?:20)?([9012]\d)(0\d|1[012])([012]\d|3[01])(?!\d|-| {2,})]]></text> |
| 192 | +</define> |
| 193 | +<define name="_masheddate2" extract="month, day, year"> |
| 194 | + <text><![CDATA[(?:^|source::).*?(?<!\d|\d\.)(0\d|1[012])([012]\d|3[01])(?:20)?([9012]\d)(?!\d| {2,})]]></text> |
| 195 | +</define> |
| 196 | + |
| 197 | +<define name="_utcepoch" extract="utcepoch, subsecond"> |
| 198 | + <!-- update regex before '2023' --> |
| 199 | + <text><![CDATA[((?<=^|[\s#,"=\(\[\|\{])(?:1[0123456]|9)\d{8}|^@[\da-fA-F]{16,24})(?:\.?(\d{1,6}))?(?![\d\(])]]></text> |
| 200 | +</define> |
| 201 | + |
| 202 | +<timePatterns> |
| 203 | + <use name="_time"/> |
| 204 | + <use name="_hmtime"/> |
| 205 | + <use name="_hmtime"/> |
| 206 | + <use name="_dottime"/> |
| 207 | + <use name="_combdatetime"/> |
| 208 | + <use name="_utcepoch"/> |
| 209 | + <use name="_combdatetime2"/> |
| 210 | +</timePatterns> |
| 211 | +<datePatterns> |
| 212 | + <use name="_usdate1"/> |
| 213 | + <use name="_usdate2"/> |
| 214 | + <use name="_isodate"/> |
| 215 | + <use name="_eurodate1"/> |
| 216 | + <use name="_eurodate2"/> |
| 217 | + <use name="_bareurlitdate"/> |
| 218 | + <use name="_orddate"/> |
| 219 | + <use name="_combdatetime"/> |
| 220 | + <use name="_masheddate"/> |
| 221 | + <use name="_masheddate2"/> |
| 222 | + <use name="_combdatetime2"/> |
| 223 | +</datePatterns> |
| 224 | + |
| 225 | +</datetime> |
0 commit comments