Skip to content

Commit 42a2f25

Browse files
MihaZupanMiha Zupan
andauthored
Make Uri path compression O(n) (#117820)
* Move compress logic to UriHelper * Add regression test * Linear impl * Skip first no-op copy --------- Co-authored-by: Miha Zupan <[email protected]>
1 parent 9c60dcc commit 42a2f25

File tree

5 files changed

+329
-142
lines changed

5 files changed

+329
-142
lines changed

src/libraries/System.Private.Uri/src/System.Private.Uri.csproj

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,9 @@
66
</PropertyGroup>
77

88
<ItemGroup>
9-
<Compile Include="$(CommonPath)System\Collections\Generic\ArrayBuilder.cs"
10-
Link="Common\System\Collections\Generic\ArrayBuilder.cs" />
11-
<Compile Include="$(CommonPath)System\HexConverter.cs"
12-
Link="Common\System\HexConverter.cs" />
13-
<Compile Include="$(CommonPath)System\Obsoletions.cs"
14-
Link="Common\System\Obsoletions.cs" />
15-
</ItemGroup>
16-
17-
<ItemGroup>
9+
<Compile Include="$(CoreLibSharedDir)System\Collections\Generic\ValueListBuilder.cs" Link="Common\System\Collections\Generic\ValueListBuilder.cs" />
10+
<Compile Include="$(CommonPath)System\HexConverter.cs" Link="Common\System\HexConverter.cs" />
11+
<Compile Include="$(CommonPath)System\Obsoletions.cs" Link="Common\System\Obsoletions.cs" />
1812
<Compile Include="System\DomainNameHelper.cs" />
1913
<Compile Include="System\GenericUriParser.cs" />
2014
<Compile Include="System\IPv4AddressHelper.cs" />

src/libraries/System.Private.Uri/src/System/Uri.cs

Lines changed: 15 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,8 @@ public string[] Segments
950950
}
951951
else
952952
{
953-
ArrayBuilder<string> pathSegments = default;
953+
var pathSegments = new ValueListBuilder<string>(4);
954+
954955
int current = 0;
955956
while (current < path.Length)
956957
{
@@ -959,10 +960,12 @@ public string[] Segments
959960
{
960961
next = path.Length - 1;
961962
}
962-
pathSegments.Add(path.Substring(current, (next - current) + 1));
963+
pathSegments.Append(path.Substring(current, (next - current) + 1));
963964
current = next + 1;
964965
}
965-
segments = pathSegments.ToArray();
966+
967+
segments = pathSegments.AsSpan().ToArray();
968+
pathSegments.Dispose();
966969
}
967970

968971
return segments;
@@ -4438,7 +4441,11 @@ private unsafe void GetCanonicalPath(ref ValueStringBuilder dest, UriFormat form
44384441
if (InFact(Flags.ShouldBeCompressed) && dest.Length - offset > 0)
44394442
{
44404443
// It will also convert back slashes if needed
4441-
dest.Length = offset + Compress(dest.RawChars.Slice(offset, dest.Length - offset), _syntax);
4444+
dest.Length = offset + UriHelper.Compress(
4445+
dest.RawChars.Slice(offset, dest.Length - offset),
4446+
_syntax.InFact(UriSyntaxFlags.ConvertPathSlashes),
4447+
_syntax.InFact(UriSyntaxFlags.CanonicalizeAsFilePath));
4448+
44424449
if (dest[start] == '\\')
44434450
dest[start] = '/';
44444451

@@ -4576,131 +4583,10 @@ private static unsafe void UnescapeOnly(char* pch, int start, ref int end, char
45764583

45774584
private static void Compress(char[] dest, int start, ref int destLength, UriParser syntax)
45784585
{
4579-
destLength = start + Compress(dest.AsSpan(start, destLength - start), syntax);
4580-
}
4581-
4582-
//
4583-
// This will compress any "\" "/../" "/./" "///" "/..../" /XXX.../, etc found in the input
4584-
//
4585-
// The passed syntax controls whether to use aggressive compression or the one specified in RFC 2396
4586-
//
4587-
private static int Compress(Span<char> span, UriParser syntax)
4588-
{
4589-
if (syntax.InFact(UriSyntaxFlags.ConvertPathSlashes))
4590-
{
4591-
span.Replace('\\', '/');
4592-
}
4593-
4594-
int slashCount = 0;
4595-
int lastSlash = 0;
4596-
int dotCount = 0;
4597-
int removeSegments = 0;
4598-
4599-
for (int i = span.Length - 1; i >= 0; i--)
4600-
{
4601-
char ch = span[i];
4602-
4603-
// compress multiple '/' for file URI
4604-
if (ch == '/')
4605-
{
4606-
++slashCount;
4607-
}
4608-
else
4609-
{
4610-
if (slashCount > 1)
4611-
{
4612-
// else preserve repeated slashes
4613-
lastSlash = i + 1;
4614-
}
4615-
slashCount = 0;
4616-
}
4617-
4618-
if (ch == '.')
4619-
{
4620-
++dotCount;
4621-
continue;
4622-
}
4623-
else if (dotCount != 0)
4624-
{
4625-
bool skipSegment = syntax.NotAny(UriSyntaxFlags.CanonicalizeAsFilePath)
4626-
&& (dotCount > 2 || ch != '/');
4627-
4628-
// Cases:
4629-
// /./ = remove this segment
4630-
// /../ = remove this segment, mark next for removal
4631-
// /....x = DO NOT TOUCH, leave as is
4632-
// x.../ = DO NOT TOUCH, leave as is, except for V2 legacy mode
4633-
if (!skipSegment && ch == '/')
4634-
{
4635-
if ((lastSlash == i + dotCount + 1 // "/..../"
4636-
|| (lastSlash == 0 && i + dotCount + 1 == span.Length)) // "/..."
4637-
&& (dotCount <= 2))
4638-
{
4639-
// /./ or /.<eos> or /../ or /..<eos>
4640-
4641-
// span.Remove(i + 1, dotCount + (lastSlash == 0 ? 0 : 1));
4642-
lastSlash = i + 1 + dotCount + (lastSlash == 0 ? 0 : 1);
4643-
span.Slice(lastSlash).CopyTo(span.Slice(i + 1));
4644-
span = span.Slice(0, span.Length - (lastSlash - i - 1));
4645-
4646-
lastSlash = i;
4647-
if (dotCount == 2)
4648-
{
4649-
// We have 2 dots in between like /../ or /..<eos>,
4650-
// Mark next segment for removal and remove this /../ or /..
4651-
++removeSegments;
4652-
}
4653-
dotCount = 0;
4654-
continue;
4655-
}
4656-
}
4657-
// .NET 4.5 no longer removes trailing dots in a path segment x.../ or x...<eos>
4658-
dotCount = 0;
4659-
4660-
// Here all other cases go such as
4661-
// x.[..]y or /.[..]x or (/x.[...][/] && removeSegments !=0)
4662-
}
4663-
4664-
// Now we may want to remove a segment because of previous /../
4665-
if (ch == '/')
4666-
{
4667-
if (removeSegments != 0)
4668-
{
4669-
--removeSegments;
4670-
4671-
span.Slice(lastSlash + 1).CopyTo(span.Slice(i + 1));
4672-
span = span.Slice(0, span.Length - (lastSlash - i));
4673-
}
4674-
lastSlash = i;
4675-
}
4676-
}
4677-
4678-
if (span.Length != 0 && syntax.InFact(UriSyntaxFlags.CanonicalizeAsFilePath))
4679-
{
4680-
if (slashCount <= 1)
4681-
{
4682-
if (removeSegments != 0 && span[0] != '/')
4683-
{
4684-
//remove first not rooted segment
4685-
lastSlash++;
4686-
span.Slice(lastSlash).CopyTo(span);
4687-
return span.Length - lastSlash;
4688-
}
4689-
else if (dotCount != 0)
4690-
{
4691-
// If final string starts with a segment looking like .[...]/ or .[...]<eos>
4692-
// then we remove this first segment
4693-
if (lastSlash == dotCount || (lastSlash == 0 && dotCount == span.Length))
4694-
{
4695-
dotCount += lastSlash == 0 ? 0 : 1;
4696-
span.Slice(dotCount).CopyTo(span);
4697-
return span.Length - dotCount;
4698-
}
4699-
}
4700-
}
4701-
}
4702-
4703-
return span.Length;
4586+
destLength = start + UriHelper.Compress(
4587+
dest.AsSpan(start, destLength - start),
4588+
syntax.InFact(UriSyntaxFlags.ConvertPathSlashes),
4589+
syntax.InFact(UriSyntaxFlags.CanonicalizeAsFilePath));
47044590
}
47054591

47064592
//

src/libraries/System.Private.Uri/src/System/UriHelper.cs

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Buffers;
5+
using System.Collections.Generic;
56
using System.Diagnostics;
67
using System.Diagnostics.CodeAnalysis;
78
using System.Runtime.InteropServices;
@@ -657,5 +658,162 @@ public static bool StripBidiControlCharacters(ReadOnlySpan<char> strToClean, [No
657658
});
658659
return true;
659660
}
661+
662+
// This will compress any "\" "/../" "/./" "///" "/..../" /XXX.../, etc found in the input
663+
//
664+
// The passed options control whether to use aggressive compression or the one specified in RFC 2396
665+
public static int Compress(Span<char> span, bool convertPathSlashes, bool canonicalizeAsFilePath)
666+
{
667+
if (span.IsEmpty)
668+
{
669+
return 0;
670+
}
671+
672+
if (convertPathSlashes)
673+
{
674+
span.Replace('\\', '/');
675+
}
676+
677+
ValueListBuilder<(int Start, int Length)> removedSegments = default;
678+
679+
int slashCount = 0;
680+
int lastSlash = 0;
681+
int dotCount = 0;
682+
int removeSegments = 0;
683+
684+
for (int i = span.Length - 1; i >= 0; i--)
685+
{
686+
char ch = span[i];
687+
688+
// compress multiple '/' for file URI
689+
if (ch == '/')
690+
{
691+
++slashCount;
692+
}
693+
else
694+
{
695+
if (slashCount > 1)
696+
{
697+
// else preserve repeated slashes
698+
lastSlash = i + 1;
699+
}
700+
slashCount = 0;
701+
}
702+
703+
if (ch == '.')
704+
{
705+
++dotCount;
706+
continue;
707+
}
708+
else if (dotCount != 0)
709+
{
710+
bool skipSegment = canonicalizeAsFilePath && (dotCount > 2 || ch != '/');
711+
712+
// Cases:
713+
// /./ = remove this segment
714+
// /../ = remove this segment, mark next for removal
715+
// /....x = DO NOT TOUCH, leave as is
716+
// x.../ = DO NOT TOUCH, leave as is, except for V2 legacy mode
717+
if (!skipSegment && ch == '/')
718+
{
719+
if ((lastSlash == i + dotCount + 1 // "/..../"
720+
|| (lastSlash == 0 && i + dotCount + 1 == span.Length)) // "/..."
721+
&& (dotCount <= 2))
722+
{
723+
// /./ or /.<eos> or /../ or /..<eos>
724+
removedSegments.Append((i + 1, dotCount + (lastSlash == 0 ? 0 : 1)));
725+
726+
lastSlash = i;
727+
if (dotCount == 2)
728+
{
729+
// We have 2 dots in between like /../ or /..<eos>,
730+
// Mark next segment for removal and remove this /../ or /..
731+
++removeSegments;
732+
}
733+
dotCount = 0;
734+
continue;
735+
}
736+
}
737+
// .NET 4.5 no longer removes trailing dots in a path segment x.../ or x...<eos>
738+
dotCount = 0;
739+
740+
// Here all other cases go such as
741+
// x.[..]y or /.[..]x or (/x.[...][/] && removeSegments !=0)
742+
}
743+
744+
// Now we may want to remove a segment because of previous /../
745+
if (ch == '/')
746+
{
747+
if (removeSegments != 0)
748+
{
749+
removeSegments--;
750+
removedSegments.Append((i + 1, lastSlash - i));
751+
}
752+
753+
lastSlash = i;
754+
}
755+
}
756+
757+
if (canonicalizeAsFilePath)
758+
{
759+
if (slashCount <= 1)
760+
{
761+
if (removeSegments != 0 && span[0] != '/')
762+
{
763+
// remove first not rooted segment
764+
removedSegments.Append((0, lastSlash + 1));
765+
}
766+
else if (dotCount != 0)
767+
{
768+
// If final string starts with a segment looking like .[...]/ or .[...]<eos>
769+
// then we remove this first segment
770+
if (lastSlash == dotCount || (lastSlash == 0 && dotCount == span.Length))
771+
{
772+
removedSegments.Append((0, dotCount + (lastSlash == 0 ? 0 : 1)));
773+
}
774+
}
775+
}
776+
}
777+
778+
if (removedSegments.Length == 0)
779+
{
780+
return span.Length;
781+
}
782+
783+
// Merge any remaining segments.
784+
// Write and read offsets are only ever the same for the first segment.
785+
// Copying the first section would no-op anyway, so we start with the first removed segment.
786+
int writeOffset = removedSegments[^1].Start;
787+
int readOffset = writeOffset;
788+
789+
for (int i = removedSegments.Length - 1; i >= 0; i--)
790+
{
791+
(int start, int length) = removedSegments[i];
792+
793+
Debug.Assert(start >= readOffset && length > 0 && start + length <= span.Length);
794+
795+
if (readOffset != start)
796+
{
797+
Debug.Assert(readOffset > writeOffset);
798+
799+
int segmentLength = start - readOffset;
800+
span.Slice(readOffset, segmentLength).CopyTo(span.Slice(writeOffset));
801+
writeOffset += segmentLength;
802+
}
803+
804+
readOffset = start + length;
805+
}
806+
807+
if (readOffset != span.Length)
808+
{
809+
Debug.Assert(readOffset > writeOffset);
810+
811+
span.Slice(readOffset).CopyTo(span.Slice(writeOffset));
812+
writeOffset += span.Length - readOffset;
813+
}
814+
815+
removedSegments.Dispose();
816+
return writeOffset;
817+
}
660818
}
661819
}

0 commit comments

Comments
 (0)