Skip to content

Commit 878dbd8

Browse files
authored
Merge pull request #901 from lightpanda-io/url_stitch
Rework URL.stitch, handle ../ (for yahoo)
2 parents 3c64ed1 + 1af2513 commit 878dbd8

File tree

1 file changed

+172
-123
lines changed

1 file changed

+172
-123
lines changed

src/url.zig

Lines changed: 172 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -100,52 +100,72 @@ pub const URL = struct {
100100
/// For URLs without a path, it will add src as the path.
101101
pub fn stitch(
102102
allocator: Allocator,
103-
src: []const u8,
103+
path: []const u8,
104104
base: []const u8,
105105
opts: StitchOpts,
106106
) ![]const u8 {
107-
if (base.len == 0 or isURL(src)) {
107+
if (base.len == 0 or isComleteHTTPUrl(path)) {
108108
if (opts.alloc == .always) {
109-
return allocator.dupe(u8, src);
109+
return allocator.dupe(u8, path);
110110
}
111-
return src;
111+
return path;
112112
}
113113

114-
var normalized_src = src;
115-
while (std.mem.startsWith(u8, normalized_src, "./")) {
116-
normalized_src = normalized_src[2..];
117-
}
118-
119-
if (normalized_src.len == 0) {
114+
if (path.len == 0) {
120115
if (opts.alloc == .always) {
121116
return allocator.dupe(u8, base);
122117
}
123118
return base;
124119
}
125120

126-
const protocol_end: usize = blk: {
127-
if (std.mem.indexOf(u8, base, "://")) |protocol_index| {
128-
break :blk protocol_index + 3;
129-
} else {
130-
break :blk 0;
131-
}
132-
};
121+
// Quick hack because domains have to be at least 3 characters.
122+
// Given https://a.b this will point to 'a'
123+
// Given http://a.b this will point '.'
124+
// Either way, we just care about this value to find the start of the path
125+
const protocol_end: usize = if (isComleteHTTPUrl(base)) 8 else 0;
133126

134-
if (normalized_src[0] == '/') {
135-
if (std.mem.indexOfScalarPos(u8, base, protocol_end, '/')) |pos| {
136-
return std.fmt.allocPrint(allocator, "{s}{s}", .{ base[0..pos], normalized_src });
137-
}
138-
// not sure what to do here...error? Just let it fallthrough for now.
127+
if (path[0] == '/') {
128+
const pos = std.mem.indexOfScalarPos(u8, base, protocol_end, '/') orelse base.len;
129+
return std.fmt.allocPrint(allocator, "{s}{s}", .{ base[0..pos], path });
139130
}
140131

141-
if (std.mem.lastIndexOfScalar(u8, base[protocol_end..], '/')) |index| {
142-
const last_slash_pos = index + protocol_end;
143-
if (last_slash_pos == base.len - 1) {
144-
return std.fmt.allocPrint(allocator, "{s}{s}", .{ base, normalized_src });
132+
var normalized_base = base;
133+
if (std.mem.lastIndexOfScalar(u8, base[protocol_end..], '/')) |pos| {
134+
normalized_base = base[0 .. pos + protocol_end];
135+
}
136+
137+
var out = try std.fmt.allocPrint(allocator, "{s}/{s}", .{
138+
normalized_base,
139+
path,
140+
});
141+
142+
// Strip out ./ and ../. This is done in-place, because doing so can
143+
// only ever make `out` smaller. After this, `out` cannot be freed by
144+
// an allocator, which is ok, because we expect allocator to be an arena.
145+
var in_i: usize = 0;
146+
var out_i: usize = 0;
147+
while (in_i < out.len) {
148+
if (std.mem.startsWith(u8, out[in_i..], "./")) {
149+
in_i += 2;
150+
continue;
145151
}
146-
return std.fmt.allocPrint(allocator, "{s}/{s}", .{ base[0..last_slash_pos], normalized_src });
152+
if (std.mem.startsWith(u8, out[in_i..], "../")) {
153+
std.debug.assert(out[out_i - 1] == '/');
154+
155+
out_i -= 2;
156+
while (out_i > 1 and out[out_i - 1] != '/') {
157+
out_i -= 1;
158+
}
159+
// <= to deal with the hack-ish protocol_end which will be off-by-one between http and https
160+
if (out_i <= protocol_end) return error.InvalidURL;
161+
in_i += 3;
162+
continue;
163+
}
164+
out[out_i] = out[in_i];
165+
in_i += 1;
166+
out_i += 1;
147167
}
148-
return std.fmt.allocPrint(allocator, "{s}/{s}", .{ base, normalized_src });
168+
return out[0..out_i];
149169
}
150170

151171
pub fn concatQueryString(arena: Allocator, url: []const u8, query_string: []const u8) ![]const u8 {
@@ -174,7 +194,7 @@ pub const URL = struct {
174194
}
175195
};
176196

177-
fn isURL(url: []const u8) bool {
197+
fn isComleteHTTPUrl(url: []const u8) bool {
178198
if (std.mem.startsWith(u8, url, "://")) {
179199
return true;
180200
}
@@ -195,17 +215,17 @@ fn isURL(url: []const u8) bool {
195215
}
196216

197217
const testing = @import("testing.zig");
198-
test "URL: isURL" {
199-
try testing.expectEqual(true, isURL("://lightpanda.io"));
200-
try testing.expectEqual(true, isURL("://lightpanda.io/about"));
201-
try testing.expectEqual(true, isURL("http://lightpanda.io/about"));
202-
try testing.expectEqual(true, isURL("HttP://lightpanda.io/about"));
203-
try testing.expectEqual(true, isURL("httpS://lightpanda.io/about"));
204-
try testing.expectEqual(true, isURL("HTTPs://lightpanda.io/about"));
205-
206-
try testing.expectEqual(false, isURL("/lightpanda.io"));
207-
try testing.expectEqual(false, isURL("../../about"));
208-
try testing.expectEqual(false, isURL("about"));
218+
test "URL: isComleteHTTPUrl" {
219+
try testing.expectEqual(true, isComleteHTTPUrl("://lightpanda.io"));
220+
try testing.expectEqual(true, isComleteHTTPUrl("://lightpanda.io/about"));
221+
try testing.expectEqual(true, isComleteHTTPUrl("http://lightpanda.io/about"));
222+
try testing.expectEqual(true, isComleteHTTPUrl("HttP://lightpanda.io/about"));
223+
try testing.expectEqual(true, isComleteHTTPUrl("httpS://lightpanda.io/about"));
224+
try testing.expectEqual(true, isComleteHTTPUrl("HTTPs://lightpanda.io/about"));
225+
226+
try testing.expectEqual(false, isComleteHTTPUrl("/lightpanda.io"));
227+
try testing.expectEqual(false, isComleteHTTPUrl("../../about"));
228+
try testing.expectEqual(false, isComleteHTTPUrl("about"));
209229
}
210230

211231
test "URL: resolve size" {
@@ -224,93 +244,122 @@ test "URL: resolve size" {
224244
try std.testing.expectEqualStrings(out_url.raw[26..], &url_string);
225245
}
226246

227-
test "URL: Stitching Base & Src URLs (Basic)" {
228-
const allocator = testing.allocator;
229-
230-
const base = "https://lightpanda.io/xyz/abc/123";
231-
const src = "something.js";
232-
const result = try URL.stitch(allocator, src, base, .{});
233-
defer allocator.free(result);
234-
try testing.expectString("https://lightpanda.io/xyz/abc/something.js", result);
235-
}
236-
237-
test "URL: Stitching Base & Src URLs (Just Ending Slash)" {
238-
const allocator = testing.allocator;
239-
240-
const base = "https://lightpanda.io/";
241-
const src = "something.js";
242-
const result = try URL.stitch(allocator, src, base, .{});
243-
defer allocator.free(result);
244-
try testing.expectString("https://lightpanda.io/something.js", result);
245-
}
246-
247-
test "URL: Stitching Base & Src URLs with leading slash" {
248-
const allocator = testing.allocator;
249-
250-
const base = "https://lightpanda.io/";
251-
const src = "/something.js";
252-
const result = try URL.stitch(allocator, src, base, .{});
253-
defer allocator.free(result);
254-
try testing.expectString("https://lightpanda.io/something.js", result);
255-
}
256-
257-
test "URL: Stitching Base & Src URLs (No Ending Slash)" {
258-
const allocator = testing.allocator;
259-
260-
const base = "https://lightpanda.io";
261-
const src = "something.js";
262-
const result = try URL.stitch(allocator, src, base, .{});
263-
defer allocator.free(result);
264-
try testing.expectString("https://lightpanda.io/something.js", result);
265-
}
266-
267-
test "URL: Stitching Base with absolute src" {
268-
const allocator = testing.allocator;
269-
270-
const base = "https://lightpanda.io/hello";
271-
const src = "/abc/something.js";
272-
const result = try URL.stitch(allocator, src, base, .{});
273-
defer allocator.free(result);
274-
try testing.expectString("https://lightpanda.io/abc/something.js", result);
275-
}
276-
277-
test "URL: Stiching Base & Src URLs (Both Local)" {
278-
const allocator = testing.allocator;
279-
280-
const base = "./abcdef/123.js";
281-
const src = "something.js";
282-
const result = try URL.stitch(allocator, src, base, .{});
283-
defer allocator.free(result);
284-
try testing.expectString("./abcdef/something.js", result);
285-
}
286-
287-
test "URL: Stiching src as full path" {
288-
const allocator = testing.allocator;
289-
290-
const base = "https://www.lightpanda.io/";
291-
const src = "https://lightpanda.io/something.js";
292-
const result = try URL.stitch(allocator, src, base, .{ .alloc = .if_needed });
293-
try testing.expectString("https://lightpanda.io/something.js", result);
294-
}
247+
test "URL: stitch" {
248+
defer testing.reset();
295249

296-
test "URL: Stitching Base & Src URLs (empty src)" {
297-
const allocator = testing.allocator;
250+
const Case = struct {
251+
base: []const u8,
252+
path: []const u8,
253+
expected: []const u8,
254+
};
298255

299-
const base = "https://lightpanda.io/xyz/abc/123";
300-
const src = "";
301-
const result = try URL.stitch(allocator, src, base, .{});
302-
defer allocator.free(result);
303-
try testing.expectString("https://lightpanda.io/xyz/abc/123", result);
304-
}
256+
const cases = [_]Case{
257+
.{
258+
.base = "https://lightpanda.io/xyz/abc/123",
259+
.path = "something.js",
260+
.expected = "https://lightpanda.io/xyz/abc/something.js",
261+
},
262+
.{
263+
.base = "https://lightpanda.io/xyz/abc/123",
264+
.path = "/something.js",
265+
.expected = "https://lightpanda.io/something.js",
266+
},
267+
.{
268+
.base = "https://lightpanda.io/",
269+
.path = "something.js",
270+
.expected = "https://lightpanda.io/something.js",
271+
},
272+
.{
273+
.base = "https://lightpanda.io/",
274+
.path = "/something.js",
275+
.expected = "https://lightpanda.io/something.js",
276+
},
277+
.{
278+
.base = "https://lightpanda.io",
279+
.path = "something.js",
280+
.expected = "https://lightpanda.io/something.js",
281+
},
282+
.{
283+
.base = "https://lightpanda.io",
284+
.path = "abc/something.js",
285+
.expected = "https://lightpanda.io/abc/something.js",
286+
},
287+
.{
288+
.base = "https://lightpanda.io/nested",
289+
.path = "abc/something.js",
290+
.expected = "https://lightpanda.io/abc/something.js",
291+
},
292+
.{
293+
.base = "https://lightpanda.io/nested/",
294+
.path = "abc/something.js",
295+
.expected = "https://lightpanda.io/nested/abc/something.js",
296+
},
297+
.{
298+
.base = "https://lightpanda.io/nested/",
299+
.path = "/abc/something.js",
300+
.expected = "https://lightpanda.io/abc/something.js",
301+
},
302+
.{
303+
.base = "https://lightpanda.io/nested/",
304+
.path = "http://www.github.com/lightpanda-io/",
305+
.expected = "http://www.github.com/lightpanda-io/",
306+
},
307+
.{
308+
.base = "https://lightpanda.io/nested/",
309+
.path = "",
310+
.expected = "https://lightpanda.io/nested/",
311+
},
312+
.{
313+
.base = "https://lightpanda.io/abc/aaa",
314+
.path = "./hello/./world",
315+
.expected = "https://lightpanda.io/abc/hello/world",
316+
},
317+
.{
318+
.base = "https://lightpanda.io/abc/aaa/",
319+
.path = "../hello",
320+
.expected = "https://lightpanda.io/abc/hello",
321+
},
322+
.{
323+
.base = "https://lightpanda.io/abc/aaa",
324+
.path = "../hello",
325+
.expected = "https://lightpanda.io/hello",
326+
},
327+
.{
328+
.base = "https://lightpanda.io/abc/aaa/",
329+
.path = "./.././.././hello",
330+
.expected = "https://lightpanda.io/hello",
331+
},
332+
.{
333+
.base = "some/page",
334+
.path = "hello",
335+
.expected = "some/hello",
336+
},
337+
.{
338+
.base = "some/page/",
339+
.path = "hello",
340+
.expected = "some/page/hello",
341+
},
342+
343+
.{
344+
.base = "some/page/other",
345+
.path = ".././hello",
346+
.expected = "some/hello",
347+
},
348+
};
305349

306-
test "URL: Stitching dotslash" {
307-
const allocator = testing.allocator;
350+
for (cases) |case| {
351+
const result = try stitch(testing.arena_allocator, case.path, case.base, .{});
352+
try testing.expectString(case.expected, result);
353+
}
308354

309-
const base = "https://lightpanda.io/hello/";
310-
const src = "./something.js";
311-
const result = try URL.stitch(allocator, src, base, .{});
312-
defer allocator.free(result);
313-
try testing.expectString("https://lightpanda.io/hello/something.js", result);
355+
try testing.expectError(
356+
error.InvalidURL,
357+
stitch(testing.arena_allocator, "../hello", "https://lightpanda.io/", .{}),
358+
);
359+
try testing.expectError(
360+
error.InvalidURL,
361+
stitch(testing.arena_allocator, "../hello", "http://lightpanda.io/", .{}),
362+
);
314363
}
315364

316365
test "URL: concatQueryString" {

0 commit comments

Comments
 (0)