forked from UglyToad/PdfPig
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNameTokenizer.cs
More file actions
123 lines (101 loc) · 3.63 KB
/
NameTokenizer.cs
File metadata and controls
123 lines (101 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
namespace UglyToad.PdfPig.Tokenization
{
using System;
using System.Text;
using Core;
using Tokens;
#if NET
using System.Text.Unicode;
#endif
internal sealed class NameTokenizer : ITokenizer
{
#if NET
static NameTokenizer()
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
}
#endif
public bool ReadsNextByte => false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;
if (currentByte != '/')
{
return false;
}
using var bytes = new ArrayPoolBufferWriter<byte>();
bool escapeActive = false;
int postEscapeRead = 0;
Span<char> escapedChars = stackalloc char[2];
while (inputBytes.Peek() is { } b)
{
if (b == '#')
{
escapeActive = true;
}
else if (escapeActive)
{
if (ReadHelper.IsHex((char)b))
{
escapedChars[postEscapeRead] = (char)b;
postEscapeRead++;
if (postEscapeRead == 2)
{
// We validated that the char is hex. So assume ASCII rules apply and shortcut hex decoding
int high = escapedChars[0] <= '9' ? escapedChars[0] - '0' : ((escapedChars[0] & 0xF) + 9);
int low = escapedChars[1] <= '9' ? escapedChars[1] - '0' : ((escapedChars[1] & 0xF) + 9);
byte characterToWrite = (byte)(high * 16 + low);
bytes.Write(characterToWrite);
escapeActive = false;
postEscapeRead = 0;
}
}
else
{
bytes.Write((byte)'#');
if (postEscapeRead == 1)
{
bytes.Write((byte)escapedChars[0]);
}
if (ReadHelper.IsEndOfName(b))
{
break;
}
if (b == '#')
{
// Make it clear what's going on, we read something like #m#AE
// ReSharper disable once RedundantAssignment
escapeActive = true;
postEscapeRead = 0;
continue;
}
bytes.Write(b);
escapeActive = false;
postEscapeRead = 0;
}
}
else if (ReadHelper.IsEndOfName(b))
{
break;
}
else
{
bytes.Write(b);
}
inputBytes.MoveNext();
}
#if NET8_0_OR_GREATER
var byteArray = bytes.WrittenSpan;
bool isValidUtf8 = Utf8.IsValid(byteArray);
#else
var byteArray = bytes.WrittenSpan.ToArray();
bool isValidUtf8 = ReadHelper.IsValidUtf8(byteArray);
#endif
var str = isValidUtf8
? Encoding.UTF8.GetString(byteArray)
: Encoding.GetEncoding("windows-1252").GetString(byteArray);
token = NameToken.Create(str);
return true;
}
}
}