|
| 1 | +const std = @import("std"); |
| 2 | +const Completion = @import("Ast.zig").Completion; |
| 3 | +const Registry = @import("language_tag/parse.zig").Registry; |
| 4 | +const registry: Registry = @import("language-tag-registry"); |
| 5 | + |
| 6 | +pub const Rejection = struct { |
| 7 | + reason: []const u8, |
| 8 | + offset: u32, |
| 9 | + length: u32, |
| 10 | + |
| 11 | + pub fn init(bytes: []const u8, subtag: []const u8, reason: []const u8) Rejection { |
| 12 | + return .{ |
| 13 | + .reason = reason, |
| 14 | + .offset = @intCast(@intFromPtr(subtag.ptr) - @intFromPtr(bytes.ptr)), |
| 15 | + .length = @intCast(subtag.len), |
| 16 | + }; |
| 17 | + } |
| 18 | +}; |
| 19 | + |
| 20 | +pub fn validate(bytes: []const u8) ?Rejection { |
| 21 | + if (maps.grandfathered.get(bytes)) |data| { |
| 22 | + if (!data.is_deprecated) return null; |
| 23 | + } |
| 24 | + |
| 25 | + const ParseState = enum { |
| 26 | + language, |
| 27 | + extlang, |
| 28 | + script, |
| 29 | + region, |
| 30 | + variant, |
| 31 | + singleton, |
| 32 | + extension, |
| 33 | + extension_extra, |
| 34 | + privateuse, |
| 35 | + privateuse_extra, |
| 36 | + }; |
| 37 | + var parse_state: ParseState = .language; |
| 38 | + |
| 39 | + var subtags = std.mem.splitScalar(u8, bytes, '-'); |
| 40 | + while (subtags.next()) |subtag| state: switch (parse_state) { |
| 41 | + .language => switch (subtag.len) { |
| 42 | + 0 => return .init(bytes, subtag, "cannot be empty"), |
| 43 | + 1 => return .init(bytes, subtag, "too short"), |
| 44 | + 2...8 => { |
| 45 | + if (maps.language.get(subtag)) |data| { |
| 46 | + if (data.is_deprecated) return .init(bytes, subtag, "deprecated language"); |
| 47 | + } else { |
| 48 | + return .init(bytes, subtag, "unknown language"); |
| 49 | + } |
| 50 | + parse_state = .extlang; |
| 51 | + }, |
| 52 | + else => return .init(bytes, subtag, "too long"), |
| 53 | + }, |
| 54 | + .extlang => switch (subtag.len) { |
| 55 | + 3 => { |
| 56 | + if (std.ascii.isDigit(subtag[0])) continue :state .region; |
| 57 | + if (maps.extlang.get(subtag)) |data| { |
| 58 | + if (data.is_deprecated) return .init(bytes, subtag, "deprecated language extension"); |
| 59 | + for (data.prefixes) |prefix| { |
| 60 | + if (std.ascii.startsWithIgnoreCase(bytes, prefix)) break; |
| 61 | + } else { |
| 62 | + return .init(bytes, subtag, "incompatible language extension"); |
| 63 | + } |
| 64 | + } else { |
| 65 | + return .init(bytes, subtag, "unknown language extension"); |
| 66 | + } |
| 67 | + parse_state = .script; |
| 68 | + }, |
| 69 | + else => continue :state .script, |
| 70 | + }, |
| 71 | + .script => switch (subtag.len) { |
| 72 | + 4 => { |
| 73 | + if (std.ascii.isDigit(subtag[0])) continue :state .variant; |
| 74 | + if (!maps.script.has(subtag)) { |
| 75 | + return .init(bytes, subtag, "unknown language script"); |
| 76 | + } |
| 77 | + parse_state = .region; |
| 78 | + }, |
| 79 | + else => continue :state .region, |
| 80 | + }, |
| 81 | + .region => switch (subtag.len) { |
| 82 | + 2...3 => { |
| 83 | + if (maps.region.get(subtag)) |data| { |
| 84 | + if (data.is_deprecated) return .init(bytes, subtag, "deprecated language region"); |
| 85 | + } else { |
| 86 | + return .init(bytes, subtag, "unknown language region"); |
| 87 | + } |
| 88 | + parse_state = .variant; |
| 89 | + }, |
| 90 | + else => continue :state .variant, |
| 91 | + }, |
| 92 | + .variant => switch (subtag.len) { |
| 93 | + 4...8 => { |
| 94 | + if (maps.variant.get(subtag)) |data| { |
| 95 | + if (data.is_deprecated) return .init(bytes, subtag, "deprecated language variant"); |
| 96 | + for (data.prefixes) |prefix| { |
| 97 | + if (std.ascii.startsWithIgnoreCase(bytes, prefix)) break; |
| 98 | + } else { |
| 99 | + return .init(bytes, subtag, "incompatible language variant"); |
| 100 | + } |
| 101 | + } else { |
| 102 | + return .init(bytes, subtag, "unknown language variant"); |
| 103 | + } |
| 104 | + parse_state = .variant; |
| 105 | + }, |
| 106 | + else => continue :state .singleton, |
| 107 | + }, |
| 108 | + .singleton => { |
| 109 | + if (subtag.len != 1) { |
| 110 | + return .init(bytes, subtag, "extension prefix must be a single character"); |
| 111 | + } |
| 112 | + parse_state = switch (std.ascii.toLower(subtag[0])) { |
| 113 | + 'x' => .privateuse, |
| 114 | + 'a'...'w', 'y'...'z', '0'...'9' => .extension, |
| 115 | + else => return .init(bytes, subtag, "extension prefix must be alphanumeric"), |
| 116 | + }; |
| 117 | + }, |
| 118 | + .extension => switch (subtag.len) { |
| 119 | + 2...8 => { |
| 120 | + for (subtag) |char| if (!std.ascii.isAlphanumeric(char)) { |
| 121 | + return .init(bytes, subtag, "extension must be alphanumeric"); |
| 122 | + }; |
| 123 | + parse_state = .extension_extra; |
| 124 | + }, |
| 125 | + else => return .init(bytes, subtag, "wrong extension length"), |
| 126 | + }, |
| 127 | + .extension_extra => switch (subtag.len) { |
| 128 | + 2...8 => continue :state .extension, |
| 129 | + else => continue :state .singleton, |
| 130 | + }, |
| 131 | + .privateuse => switch (subtag.len) { |
| 132 | + 1...8 => { |
| 133 | + for (subtag) |char| if (!std.ascii.isAlphanumeric(char)) { |
| 134 | + return .init(bytes, subtag, "private use extension must be alphanumeric"); |
| 135 | + }; |
| 136 | + parse_state = .privateuse_extra; |
| 137 | + }, |
| 138 | + else => return .init(bytes, subtag, "wrong private use extension length"), |
| 139 | + }, |
| 140 | + .privateuse_extra => switch (subtag.len) { |
| 141 | + 1...8 => continue :state .privateuse, |
| 142 | + else => return .init(bytes, subtag, "subtag after private use extension"), |
| 143 | + }, |
| 144 | + }; |
| 145 | + return null; |
| 146 | +} |
| 147 | + |
| 148 | +pub fn completions(value: []const u8) []const Completion { |
| 149 | + if (value.len == 0) { |
| 150 | + return &language_completions; |
| 151 | + } |
| 152 | + |
| 153 | + if (std.mem.endsWith(u8, value, "-")) { |
| 154 | + return ®ion_completions; |
| 155 | + } |
| 156 | + |
| 157 | + return &.{}; |
| 158 | +} |
| 159 | + |
| 160 | +const Map = std.StaticStringMapWithEql(Registry.Subtag.Data, std.ascii.eqlIgnoreCase); |
| 161 | + |
| 162 | +const maps = struct { |
| 163 | + pub const language = makeMap("language"); |
| 164 | + pub const extlang = makeMap("extlang"); |
| 165 | + pub const script = makeMap("script"); |
| 166 | + pub const region = makeMap("region"); |
| 167 | + pub const variant = makeMap("variant"); |
| 168 | + pub const grandfathered = makeMap("grandfathered"); |
| 169 | +}; |
| 170 | + |
| 171 | +fn makeMap(comptime kind: []const u8) Map { |
| 172 | + const KV = struct { []const u8, Registry.Subtag.Data }; |
| 173 | + const subtags = @field(registry, kind); |
| 174 | + @setEvalBranchQuota(subtags.len * 2); |
| 175 | + var kvs: [subtags.len]KV = undefined; |
| 176 | + for (subtags, &kvs) |subtag, *kv| { |
| 177 | + kv.* = .{ subtag.name, subtag.data }; |
| 178 | + } |
| 179 | + return .initComptime(kvs); |
| 180 | +} |
| 181 | + |
| 182 | +const language_completions = makeCompletions("language"); |
| 183 | +const region_completions = makeCompletions("region"); |
| 184 | + |
| 185 | +fn makeCompletions(comptime kind: []const u8) [@field(registry, kind).len]Completion { |
| 186 | + const subtags = @field(registry, kind); |
| 187 | + @setEvalBranchQuota(subtags.len * 2); |
| 188 | + var comps: [subtags.len]Completion = undefined; |
| 189 | + for (subtags, &comps) |subtag, *comp| { |
| 190 | + comp.* = .{ |
| 191 | + .label = subtag.name, |
| 192 | + .desc = subtag.data.description orelse subtag.name, |
| 193 | + }; |
| 194 | + } |
| 195 | + return comps; |
| 196 | +} |
| 197 | + |
| 198 | +test "validate: all subtags" { |
| 199 | + const value = "sgn-ase-Latn-US-blasl-a-abcd-x-1234"; |
| 200 | + try std.testing.expectEqual(null, validate(value)); |
| 201 | +} |
| 202 | + |
| 203 | +test "validate: deprecated language" { |
| 204 | + const value = "in"; |
| 205 | + try std.testing.expect(validate(value) != null); |
| 206 | +} |
| 207 | + |
| 208 | +test "validate: multiple prefixes" { |
| 209 | + const valid_1 = "sgn-ase-blasl"; |
| 210 | + const valid_2 = "ase-blasl"; |
| 211 | + try std.testing.expectEqual(null, validate(valid_1)); |
| 212 | + try std.testing.expectEqual(null, validate(valid_2)); |
| 213 | + |
| 214 | + const invalid = "it-blasl"; |
| 215 | + try std.testing.expect(validate(invalid) != null); |
| 216 | +} |
0 commit comments