Skip to content

Commit 16a1677

Browse files
committed
Rework URL.stitch, handle ../ (for yahoo)
Also handle ./ anywhere in the path.
1 parent 5e74e17 commit 16a1677

File tree

1 file changed

+179
-123
lines changed

1 file changed

+179
-123
lines changed

src/url.zig

Lines changed: 179 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -100,52 +100,79 @@ pub const URL = struct {
100100
/// For URLs without a path, it will add src as the path.
101101
pub fn stitch(
102102
allocator: Allocator,
103-
src: []const u8,
103+
path: []const u8,
104104
base: []const u8,
105105
opts: StitchOpts,
106106
) ![]const u8 {
107-
if (base.len == 0 or isURL(src)) {
107+
if (base.len == 0 or isComleteHTTPUrl(path)) {
108108
if (opts.alloc == .always) {
109-
return allocator.dupe(u8, src);
109+
return allocator.dupe(u8, path);
110110
}
111-
return src;
111+
return path;
112112
}
113113

114-
var normalized_src = src;
115-
while (std.mem.startsWith(u8, normalized_src, "./")) {
116-
normalized_src = normalized_src[2..];
117-
}
118-
119-
if (normalized_src.len == 0) {
114+
if (path.len == 0) {
120115
if (opts.alloc == .always) {
121116
return allocator.dupe(u8, base);
122117
}
123118
return base;
124119
}
125120

126-
const protocol_end: usize = blk: {
127-
if (std.mem.indexOf(u8, base, "://")) |protocol_index| {
128-
break :blk protocol_index + 3;
129-
} else {
130-
break :blk 0;
131-
}
132-
};
121+
// Quick hack becauste domains have to be at least 3 characters.
122+
// Given https://a.b this will point to 'a'
123+
// Given http://a.b this will point '.'
124+
// Either way, we just care about this value to find the start of the path
125+
const protocol_end: usize = if (isComleteHTTPUrl(base)) 8 else 0;
133126

134-
if (normalized_src[0] == '/') {
135-
if (std.mem.indexOfScalarPos(u8, base, protocol_end, '/')) |pos| {
136-
return std.fmt.allocPrint(allocator, "{s}{s}", .{ base[0..pos], normalized_src });
137-
}
138-
// not sure what to do here...error? Just let it fallthrough for now.
127+
if (path[0] == '/') {
128+
const pos = std.mem.indexOfScalarPos(u8, base, protocol_end, '/') orelse base.len;
129+
return std.fmt.allocPrint(allocator, "{s}{s}", .{ base[0..pos], path });
139130
}
140131

141-
if (std.mem.lastIndexOfScalar(u8, base[protocol_end..], '/')) |index| {
142-
const last_slash_pos = index + protocol_end;
143-
if (last_slash_pos == base.len - 1) {
144-
return std.fmt.allocPrint(allocator, "{s}{s}", .{ base, normalized_src });
132+
var normalized_base = base;
133+
if (std.mem.lastIndexOfScalar(u8, base[protocol_end..], '/')) |pos| {
134+
normalized_base = base[0 .. pos + protocol_end];
135+
}
136+
137+
var out = try std.fmt.allocPrint(allocator, "{s}/{s}", .{
138+
normalized_base,
139+
path,
140+
});
141+
142+
// Strip out ./ and ../. This is done in-place, because doing so can
143+
// only ever make `out` smaller. After this, `out` cannot be freed by
144+
// an allocator, which is ok, because we expect allocator to be an arena.
145+
var in_i: usize = 0;
146+
var out_i: usize = 0;
147+
while (in_i < out.len) {
148+
if (std.mem.startsWith(u8, out[in_i..], "./")) {
149+
in_i += 2;
150+
continue;
145151
}
146-
return std.fmt.allocPrint(allocator, "{s}/{s}", .{ base[0..last_slash_pos], normalized_src });
152+
if (std.mem.startsWith(u8, out[in_i..], "../")) {
153+
std.debug.assert(out[out_i - 1] == '/');
154+
155+
out_i -= 2;
156+
while (out_i > 1) {
157+
const next = out_i - 1;
158+
if (out[next] == '/') {
159+
// <= to deal with the hack-ish protocol_end which will be
160+
// off-by-one between http and https
161+
if (out_i <= protocol_end) {
162+
return error.InvalidURL;
163+
}
164+
break;
165+
}
166+
out_i = next;
167+
}
168+
in_i += 3;
169+
continue;
170+
}
171+
out[out_i] = out[in_i];
172+
in_i += 1;
173+
out_i += 1;
147174
}
148-
return std.fmt.allocPrint(allocator, "{s}/{s}", .{ base, normalized_src });
175+
return out[0..out_i];
149176
}
150177

151178
pub fn concatQueryString(arena: Allocator, url: []const u8, query_string: []const u8) ![]const u8 {
@@ -174,7 +201,7 @@ pub const URL = struct {
174201
}
175202
};
176203

177-
fn isURL(url: []const u8) bool {
204+
fn isComleteHTTPUrl(url: []const u8) bool {
178205
if (std.mem.startsWith(u8, url, "://")) {
179206
return true;
180207
}
@@ -195,17 +222,17 @@ fn isURL(url: []const u8) bool {
195222
}
196223

197224
const testing = @import("testing.zig");
198-
test "URL: isURL" {
199-
try testing.expectEqual(true, isURL("://lightpanda.io"));
200-
try testing.expectEqual(true, isURL("://lightpanda.io/about"));
201-
try testing.expectEqual(true, isURL("http://lightpanda.io/about"));
202-
try testing.expectEqual(true, isURL("HttP://lightpanda.io/about"));
203-
try testing.expectEqual(true, isURL("httpS://lightpanda.io/about"));
204-
try testing.expectEqual(true, isURL("HTTPs://lightpanda.io/about"));
205-
206-
try testing.expectEqual(false, isURL("/lightpanda.io"));
207-
try testing.expectEqual(false, isURL("../../about"));
208-
try testing.expectEqual(false, isURL("about"));
225+
test "URL: isComleteHTTPUrl" {
226+
try testing.expectEqual(true, isComleteHTTPUrl("://lightpanda.io"));
227+
try testing.expectEqual(true, isComleteHTTPUrl("://lightpanda.io/about"));
228+
try testing.expectEqual(true, isComleteHTTPUrl("http://lightpanda.io/about"));
229+
try testing.expectEqual(true, isComleteHTTPUrl("HttP://lightpanda.io/about"));
230+
try testing.expectEqual(true, isComleteHTTPUrl("httpS://lightpanda.io/about"));
231+
try testing.expectEqual(true, isComleteHTTPUrl("HTTPs://lightpanda.io/about"));
232+
233+
try testing.expectEqual(false, isComleteHTTPUrl("/lightpanda.io"));
234+
try testing.expectEqual(false, isComleteHTTPUrl("../../about"));
235+
try testing.expectEqual(false, isComleteHTTPUrl("about"));
209236
}
210237

211238
test "URL: resolve size" {
@@ -224,93 +251,122 @@ test "URL: resolve size" {
224251
try std.testing.expectEqualStrings(out_url.raw[26..], &url_string);
225252
}
226253

227-
test "URL: Stitching Base & Src URLs (Basic)" {
228-
const allocator = testing.allocator;
229-
230-
const base = "https://lightpanda.io/xyz/abc/123";
231-
const src = "something.js";
232-
const result = try URL.stitch(allocator, src, base, .{});
233-
defer allocator.free(result);
234-
try testing.expectString("https://lightpanda.io/xyz/abc/something.js", result);
235-
}
236-
237-
test "URL: Stitching Base & Src URLs (Just Ending Slash)" {
238-
const allocator = testing.allocator;
239-
240-
const base = "https://lightpanda.io/";
241-
const src = "something.js";
242-
const result = try URL.stitch(allocator, src, base, .{});
243-
defer allocator.free(result);
244-
try testing.expectString("https://lightpanda.io/something.js", result);
245-
}
246-
247-
test "URL: Stitching Base & Src URLs with leading slash" {
248-
const allocator = testing.allocator;
249-
250-
const base = "https://lightpanda.io/";
251-
const src = "/something.js";
252-
const result = try URL.stitch(allocator, src, base, .{});
253-
defer allocator.free(result);
254-
try testing.expectString("https://lightpanda.io/something.js", result);
255-
}
256-
257-
test "URL: Stitching Base & Src URLs (No Ending Slash)" {
258-
const allocator = testing.allocator;
259-
260-
const base = "https://lightpanda.io";
261-
const src = "something.js";
262-
const result = try URL.stitch(allocator, src, base, .{});
263-
defer allocator.free(result);
264-
try testing.expectString("https://lightpanda.io/something.js", result);
265-
}
266-
267-
test "URL: Stitching Base with absolute src" {
268-
const allocator = testing.allocator;
269-
270-
const base = "https://lightpanda.io/hello";
271-
const src = "/abc/something.js";
272-
const result = try URL.stitch(allocator, src, base, .{});
273-
defer allocator.free(result);
274-
try testing.expectString("https://lightpanda.io/abc/something.js", result);
275-
}
276-
277-
test "URL: Stiching Base & Src URLs (Both Local)" {
278-
const allocator = testing.allocator;
279-
280-
const base = "./abcdef/123.js";
281-
const src = "something.js";
282-
const result = try URL.stitch(allocator, src, base, .{});
283-
defer allocator.free(result);
284-
try testing.expectString("./abcdef/something.js", result);
285-
}
286-
287-
test "URL: Stiching src as full path" {
288-
const allocator = testing.allocator;
289-
290-
const base = "https://www.lightpanda.io/";
291-
const src = "https://lightpanda.io/something.js";
292-
const result = try URL.stitch(allocator, src, base, .{ .alloc = .if_needed });
293-
try testing.expectString("https://lightpanda.io/something.js", result);
294-
}
254+
test "URL: stitch" {
255+
defer testing.reset();
295256

296-
test "URL: Stitching Base & Src URLs (empty src)" {
297-
const allocator = testing.allocator;
257+
const Case = struct {
258+
base: []const u8,
259+
path: []const u8,
260+
expected: []const u8,
261+
};
298262

299-
const base = "https://lightpanda.io/xyz/abc/123";
300-
const src = "";
301-
const result = try URL.stitch(allocator, src, base, .{});
302-
defer allocator.free(result);
303-
try testing.expectString("https://lightpanda.io/xyz/abc/123", result);
304-
}
263+
const cases = [_]Case{
264+
.{
265+
.base = "https://lightpanda.io/xyz/abc/123",
266+
.path = "something.js",
267+
.expected = "https://lightpanda.io/xyz/abc/something.js",
268+
},
269+
.{
270+
.base = "https://lightpanda.io/xyz/abc/123",
271+
.path = "/something.js",
272+
.expected = "https://lightpanda.io/something.js",
273+
},
274+
.{
275+
.base = "https://lightpanda.io/",
276+
.path = "something.js",
277+
.expected = "https://lightpanda.io/something.js",
278+
},
279+
.{
280+
.base = "https://lightpanda.io/",
281+
.path = "/something.js",
282+
.expected = "https://lightpanda.io/something.js",
283+
},
284+
.{
285+
.base = "https://lightpanda.io",
286+
.path = "something.js",
287+
.expected = "https://lightpanda.io/something.js",
288+
},
289+
.{
290+
.base = "https://lightpanda.io",
291+
.path = "abc/something.js",
292+
.expected = "https://lightpanda.io/abc/something.js",
293+
},
294+
.{
295+
.base = "https://lightpanda.io/nested",
296+
.path = "abc/something.js",
297+
.expected = "https://lightpanda.io/abc/something.js",
298+
},
299+
.{
300+
.base = "https://lightpanda.io/nested/",
301+
.path = "abc/something.js",
302+
.expected = "https://lightpanda.io/nested/abc/something.js",
303+
},
304+
.{
305+
.base = "https://lightpanda.io/nested/",
306+
.path = "/abc/something.js",
307+
.expected = "https://lightpanda.io/abc/something.js",
308+
},
309+
.{
310+
.base = "https://lightpanda.io/nested/",
311+
.path = "http://www.github.com/lightpanda-io/",
312+
.expected = "http://www.github.com/lightpanda-io/",
313+
},
314+
.{
315+
.base = "https://lightpanda.io/nested/",
316+
.path = "",
317+
.expected = "https://lightpanda.io/nested/",
318+
},
319+
.{
320+
.base = "https://lightpanda.io/abc/aaa",
321+
.path = "./hello/./world",
322+
.expected = "https://lightpanda.io/abc/hello/world",
323+
},
324+
.{
325+
.base = "https://lightpanda.io/abc/aaa/",
326+
.path = "../hello",
327+
.expected = "https://lightpanda.io/abc/hello",
328+
},
329+
.{
330+
.base = "https://lightpanda.io/abc/aaa",
331+
.path = "../hello",
332+
.expected = "https://lightpanda.io/hello",
333+
},
334+
.{
335+
.base = "https://lightpanda.io/abc/aaa/",
336+
.path = "./.././.././hello",
337+
.expected = "https://lightpanda.io/hello",
338+
},
339+
.{
340+
.base = "some/page",
341+
.path = "hello",
342+
.expected = "some/hello",
343+
},
344+
.{
345+
.base = "some/page/",
346+
.path = "hello",
347+
.expected = "some/page/hello",
348+
},
349+
350+
.{
351+
.base = "some/page/other",
352+
.path = ".././hello",
353+
.expected = "some/hello",
354+
},
355+
};
305356

306-
test "URL: Stitching dotslash" {
307-
const allocator = testing.allocator;
357+
for (cases) |case| {
358+
const result = try stitch(testing.arena_allocator, case.path, case.base, .{});
359+
try testing.expectString(case.expected, result);
360+
}
308361

309-
const base = "https://lightpanda.io/hello/";
310-
const src = "./something.js";
311-
const result = try URL.stitch(allocator, src, base, .{});
312-
defer allocator.free(result);
313-
try testing.expectString("https://lightpanda.io/hello/something.js", result);
362+
try testing.expectError(
363+
error.InvalidURL,
364+
stitch(testing.arena_allocator, "../hello", "https://lightpanda.io/", .{}),
365+
);
366+
try testing.expectError(
367+
error.InvalidURL,
368+
stitch(testing.arena_allocator, "../hello", "http://lightpanda.io/", .{}),
369+
);
314370
}
315371

316372
test "URL: concatQueryString" {

0 commit comments

Comments
 (0)