diff --git a/.github/workflows/dotnet.yml b/.github/workflows/dotnet.yml index 235f1ef..ba01b9f 100644 --- a/.github/workflows/dotnet.yml +++ b/.github/workflows/dotnet.yml @@ -15,11 +15,11 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v6 - name: Setup .NET - uses: actions/setup-dotnet@v3 + uses: actions/setup-dotnet@v5 with: - dotnet-version: 9.0.x + dotnet-version: 10 - name: Restore dependencies run: dotnet restore - name: Build diff --git a/RobotsTxt/RobotsTxtParser.cs b/RobotsTxt/RobotsTxtParser.cs index 5fb8418..65ec04a 100644 --- a/RobotsTxt/RobotsTxtParser.cs +++ b/RobotsTxt/RobotsTxtParser.cs @@ -1,3 +1,5 @@ +using System.Buffers; + namespace RobotsTxt; public class RobotsTxtParser(byte[] robotsBody, IRobotsParseHandler handler) @@ -74,8 +76,9 @@ private void ParseAndEmitLine(int currentLine, ReadOnlySpan line) key.Parse(stringKey); if (NeedEscapeValueForKey(key)) { - var escapedValue = MaybeEscapePattern(value); + var escapedValue = MaybeEscapePattern(value, out var dst); EmitKeyValueToHandler(currentLine, key, escapedValue); + if (dst != null) ArrayPool.Shared.Return(dst); } else { @@ -107,7 +110,7 @@ private void EmitKeyValueToHandler(int currentLine, ParsedRobotsKey key, ReadOnl } } - public static ReadOnlySpan MaybeEscapePattern(ReadOnlySpan src) + public static ReadOnlySpan MaybeEscapePattern(ReadOnlySpan src, out byte[]? dst) { var numToEscape = 0; var needCapitalize = false; @@ -131,19 +134,22 @@ public static ReadOnlySpan MaybeEscapePattern(ReadOnlySpan src) if (numToEscape == 0 && !needCapitalize) { + dst = null; return src; } - var dst = new byte[numToEscape * 2 + src.Length]; + dst = ArrayPool.Shared.Rent(numToEscape * 2 + src.Length); + var j = 0; for (var i = 0; i < src.Length; i++) { var c = src[i]; if (c == '%' && i + 2 < src.Length && src[i + 1].IsXDigit() && src[i + 2].IsXDigit()) { - dst[j++] = src[i++]; - dst[j++] = src[i++].ToUpper(); - dst[j++] = src[i++].ToUpper(); + dst[j++] = (byte)'%'; + dst[j++] = src[i + 1].ToUpper(); + dst[j++] = src[i + 2].ToUpper(); + i += 2; } else if (c >= 0x80) { @@ -157,7 +163,7 @@ public static ReadOnlySpan MaybeEscapePattern(ReadOnlySpan src) } } - return dst; + return dst.AsSpan(0, j); } private static bool NeedEscapeValueForKey(ParsedRobotsKey key) diff --git a/TestRobotsTxt/GoogleTests.cs b/TestRobotsTxt/GoogleTests.cs index 7422375..1feade9 100644 --- a/TestRobotsTxt/GoogleTests.cs +++ b/TestRobotsTxt/GoogleTests.cs @@ -1,3 +1,4 @@ +using System.Buffers; using System.Diagnostics; using System.Text; using RobotsTxt; @@ -1011,11 +1012,15 @@ public void TestGetPathParamsQuery(string url, string expected) [InlineData("/a/b/c", "/a/b/c")] [InlineData("á", "%C3%A1")] [InlineData("%aa", "%AA")] + [InlineData("%ab%c", "%AB%c")] + [InlineData("test%", "test%")] + [InlineData("%a", "%a")] public void TestMaybeEscapePattern(string url, string expected) { var actual = - Encoding.ASCII.GetString(RobotsTxtParser.MaybeEscapePattern(Encoding.UTF8.GetBytes(url)).ToArray()); + Encoding.ASCII.GetString(RobotsTxtParser.MaybeEscapePattern(Encoding.UTF8.GetBytes(url), out var dst).ToArray()); Assert.Equal(expected, actual); + if (dst != null) ArrayPool.Shared.Return(dst); } } } diff --git a/TestRobotsTxt/TestRobotsTxtParser.cs b/TestRobotsTxt/TestRobotsTxtParser.cs index bae5ff6..4074243 100644 --- a/TestRobotsTxt/TestRobotsTxtParser.cs +++ b/TestRobotsTxt/TestRobotsTxtParser.cs @@ -1,5 +1,8 @@ +using System.Buffers; using System.Text; + using RobotsTxt; + using Xunit; namespace TestRobotsTxt @@ -56,8 +59,9 @@ public void TestGetKeyAndValueFrom(string line, bool rc, string expectedKey, str [InlineData("é", "%C3%A9")] public void TestMaybeEscapePattern(string src, string expected) { - var actual = RobotsTxtParser.MaybeEscapePattern(Encoding.UTF8.GetBytes(src)); + var actual = RobotsTxtParser.MaybeEscapePattern(Encoding.UTF8.GetBytes(src), out var dst); Assert.Equal(expected, Encoding.UTF8.GetString(actual.ToArray())); + if (dst != null) ArrayPool.Shared.Return(dst); } } }