Skip to content

Commit e92da29

Browse files
Merge pull request #48321 from dotnet/main
Merge main into live
2 parents c916456 + 53f26c1 commit e92da29

33 files changed

+1224
-1219
lines changed

docs/standard/base-types/character-classes-in-regular-expressions.md

Lines changed: 508 additions & 508 deletions
Large diffs are not rendered by default.
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
using System.Text.RegularExpressions;
2+
3+
// Call all the example methods
4+
PositiveCharacterGroup();
5+
Console.WriteLine();
6+
7+
CharacterRange();
8+
Console.WriteLine();
9+
10+
NegativeCharacterGroup();
11+
Console.WriteLine();
12+
13+
AnyCharacterSingleline();
14+
Console.WriteLine();
15+
16+
AnyCharacterMultiline();
17+
Console.WriteLine();
18+
19+
UnicodeCategory();
20+
Console.WriteLine();
21+
22+
NegativeUnicodeCategory();
23+
Console.WriteLine();
24+
25+
WordCharacter();
26+
Console.WriteLine();
27+
28+
NonWordCharacter();
29+
Console.WriteLine();
30+
31+
WhitespaceCharacter();
32+
Console.WriteLine();
33+
34+
NonWhitespaceCharacter();
35+
Console.WriteLine();
36+
37+
DigitCharacter();
38+
Console.WriteLine();
39+
40+
NonDigitCharacter();
41+
Console.WriteLine();
42+
43+
GetUnicodeCategory();
44+
Console.WriteLine();
45+
46+
CharacterClassSubtraction();
47+
48+
// <PositiveCharacterGroup>
49+
static void PositiveCharacterGroup()
50+
{
51+
string pattern = @"gr[ae]y\s\S+?[\s\p{P}]";
52+
string input = "The gray wolf jumped over the grey wall.";
53+
MatchCollection matches = Regex.Matches(input, pattern);
54+
foreach (Match match in matches)
55+
Console.WriteLine($"'{match.Value}'");
56+
}
57+
// The example displays the following output:
58+
// 'gray wolf '
59+
// 'grey wall.'
60+
// </PositiveCharacterGroup>
61+
62+
// <CharacterRange>
63+
static void CharacterRange()
64+
{
65+
string pattern = @"\b[A-Z]\w*\b";
66+
string input = "A city Albany Zulu maritime Marseilles";
67+
foreach (Match match in Regex.Matches(input, pattern))
68+
Console.WriteLine(match.Value);
69+
}
70+
// The example displays the following output:
71+
// A
72+
// Albany
73+
// Zulu
74+
// Marseilles
75+
// </CharacterRange>
76+
77+
// <NegativeCharacterGroup>
78+
static void NegativeCharacterGroup()
79+
{
80+
string pattern = @"\bth[^o]\w+\b";
81+
string input = "thought thing though them through thus thorough this";
82+
foreach (Match match in Regex.Matches(input, pattern))
83+
Console.WriteLine(match.Value);
84+
}
85+
// The example displays the following output:
86+
// thing
87+
// them
88+
// through
89+
// thus
90+
// this
91+
// </NegativeCharacterGroup>
92+
93+
// <AnyCharacterSingleline>
94+
static void AnyCharacterSingleline()
95+
{
96+
string pattern = @"\b.*[.?!;:](\s|\z)";
97+
string input = "this. what: is? go, thing.";
98+
foreach (Match match in Regex.Matches(input, pattern))
99+
Console.WriteLine(match.Value);
100+
}
101+
// The example displays the following output:
102+
// this. what: is? go, thing.
103+
// </AnyCharacterSingleline>
104+
105+
// <AnyCharacterMultiline>
106+
static void AnyCharacterMultiline()
107+
{
108+
string pattern = "^.+";
109+
string input = "This is one line and" + Environment.NewLine + "this is the second.";
110+
foreach (Match match in Regex.Matches(input, pattern))
111+
Console.WriteLine(Regex.Escape(match.Value));
112+
113+
Console.WriteLine();
114+
foreach (Match match in Regex.Matches(input, pattern, RegexOptions.Singleline))
115+
Console.WriteLine(Regex.Escape(match.Value));
116+
}
117+
// The example displays the following output:
118+
// This\ is\ one\ line\ and\r
119+
//
120+
// This\ is\ one\ line\ and\r\nthis\ is\ the\ second\.
121+
// </AnyCharacterMultiline>
122+
123+
// <UnicodeCategory>
124+
static void UnicodeCategory()
125+
{
126+
string pattern = @"\b(\p{IsGreek}+(\s)?)+\p{Pd}\s(\p{IsBasicLatin}+(\s)?)+";
127+
string input = "Ελληνική Γλώσσα - Greek Language";
128+
129+
Console.WriteLine(Regex.IsMatch(input, pattern)); // Displays True.
130+
}
131+
// </UnicodeCategory>
132+
133+
// <NegativeUnicodeCategory>
134+
static void NegativeUnicodeCategory()
135+
{
136+
string pattern = @"(\P{Sc})+";
137+
138+
string[] values = { "$164,091.78", "£1,073,142.68", "73¢", "€120" };
139+
foreach (string value in values)
140+
Console.WriteLine(Regex.Match(value, pattern).Value);
141+
}
142+
// The example displays the following output:
143+
// 164,091.78
144+
// 1,073,142.68
145+
// 73
146+
// 120
147+
// </NegativeUnicodeCategory>
148+
149+
// <WordCharacter>
150+
static void WordCharacter()
151+
{
152+
string pattern = @"(\w)\1";
153+
string[] words = { "trellis", "seer", "latter", "summer",
154+
"hoarse", "lesser", "aardvark", "stunned" };
155+
foreach (string word in words)
156+
{
157+
Match match = Regex.Match(word, pattern);
158+
if (match.Success)
159+
Console.WriteLine($"'{match.Value}' found in '{word}' at position {match.Index}.");
160+
else
161+
Console.WriteLine($"No double characters in '{word}'.");
162+
}
163+
}
164+
// The example displays the following output:
165+
// 'll' found in 'trellis' at position 3.
166+
// 'ee' found in 'seer' at position 1.
167+
// 'tt' found in 'latter' at position 2.
168+
// 'mm' found in 'summer' at position 2.
169+
// No double characters in 'hoarse'.
170+
// 'ss' found in 'lesser' at position 2.
171+
// 'aa' found in 'aardvark' at position 0.
172+
// 'nn' found in 'stunned' at position 3.
173+
// </WordCharacter>
174+
175+
// <NonWordCharacter>
176+
static void NonWordCharacter()
177+
{
178+
string pattern = @"\b(\w+)(\W){1,2}";
179+
string input = "The old, grey mare slowly walked across the narrow, green pasture.";
180+
foreach (Match match in Regex.Matches(input, pattern))
181+
{
182+
Console.WriteLine(match.Value);
183+
Console.Write(" Non-word character(s):");
184+
CaptureCollection captures = match.Groups[2].Captures;
185+
for (int ctr = 0; ctr < captures.Count; ctr++)
186+
Console.Write(@"'{0}' (\u{1}){2}", captures[ctr].Value,
187+
Convert.ToUInt16(captures[ctr].Value[0]).ToString("X4"),
188+
ctr < captures.Count - 1 ? ", " : "");
189+
Console.WriteLine();
190+
}
191+
}
192+
// The example displays the following output:
193+
// The
194+
// Non-word character(s):' ' (\u0020)
195+
// old,
196+
// Non-word character(s):',' (\u002C), ' ' (\u0020)
197+
// grey
198+
// Non-word character(s):' ' (\u0020)
199+
// mare
200+
// Non-word character(s):' ' (\u0020)
201+
// slowly
202+
// Non-word character(s):' ' (\u0020)
203+
// walked
204+
// Non-word character(s):' ' (\u0020)
205+
// across
206+
// Non-word character(s):' ' (\u0020)
207+
// the
208+
// Non-word character(s):' ' (\u0020)
209+
// narrow,
210+
// Non-word character(s):',' (\u002C), ' ' (\u0020)
211+
// green
212+
// Non-word character(s):' ' (\u0020)
213+
// pasture.
214+
// Non-word character(s):'.' (\u002E)
215+
// </NonWordCharacter>
216+
217+
// <WhitespaceCharacter>
218+
static void WhitespaceCharacter()
219+
{
220+
string pattern = @"\b\w+(e)?s(\s|$)";
221+
string input = "matches stores stops leave leaves";
222+
foreach (Match match in Regex.Matches(input, pattern))
223+
Console.WriteLine(match.Value);
224+
}
225+
// The example displays the following output:
226+
// matches
227+
// stores
228+
// stops
229+
// leaves
230+
// </WhitespaceCharacter>
231+
232+
// <NonWhitespaceCharacter>
233+
static void NonWhitespaceCharacter()
234+
{
235+
string pattern = @"\b(\S+)\s?";
236+
string input = "This is the first sentence of the first paragraph. " +
237+
"This is the second sentence.\n" +
238+
"This is the only sentence of the second paragraph.";
239+
foreach (Match match in Regex.Matches(input, pattern))
240+
Console.WriteLine(match.Groups[1]);
241+
}
242+
// The example displays the following output:
243+
// This
244+
// is
245+
// the
246+
// first
247+
// sentence
248+
// of
249+
// the
250+
// first
251+
// paragraph.
252+
// This
253+
// is
254+
// the
255+
// second
256+
// sentence.
257+
// This
258+
// is
259+
// the
260+
// only
261+
// sentence
262+
// of
263+
// the
264+
// second
265+
// paragraph.
266+
// </NonWhitespaceCharacter>
267+
268+
// <DigitCharacter>
269+
static void DigitCharacter()
270+
{
271+
string pattern = @"^(\(?\d{3}\)?[\s-])?\d{3}-\d{4}$";
272+
string[] inputs = { "111 111-1111", "222-2222", "222 333-444",
273+
"(212) 111-1111", "111-AB1-1111",
274+
"212-111-1111", "01 999-9999" };
275+
276+
foreach (string input in inputs)
277+
{
278+
if (Regex.IsMatch(input, pattern))
279+
Console.WriteLine(input + ": matched");
280+
else
281+
Console.WriteLine(input + ": match failed");
282+
}
283+
}
284+
// The example displays the following output:
285+
// 111 111-1111: matched
286+
// 222-2222: matched
287+
// 222 333-444: match failed
288+
// (212) 111-1111: matched
289+
// 111-AB1-1111: match failed
290+
// 212-111-1111: matched
291+
// 01 999-9999: match failed
292+
// </DigitCharacter>
293+
294+
// <NonDigitCharacter>
295+
static void NonDigitCharacter()
296+
{
297+
string pattern = @"^\D\d{1,5}\D*$";
298+
string[] inputs = { "A1039C", "AA0001", "C18A", "Y938518" };
299+
300+
foreach (string input in inputs)
301+
{
302+
if (Regex.IsMatch(input, pattern))
303+
Console.WriteLine(input + ": matched");
304+
else
305+
Console.WriteLine(input + ": match failed");
306+
}
307+
}
308+
// The example displays the following output:
309+
// A1039C: matched
310+
// AA0001: match failed
311+
// C18A: matched
312+
// Y938518: match failed
313+
// </NonDigitCharacter>
314+
315+
// <GetUnicodeCategory>
316+
static void GetUnicodeCategory()
317+
{
318+
char[] chars = { 'a', 'X', '8', ',', ' ', '\u0009', '!' };
319+
320+
foreach (char ch in chars)
321+
Console.WriteLine($"'{Regex.Escape(ch.ToString())}': {Char.GetUnicodeCategory(ch)}");
322+
}
323+
// The example displays the following output:
324+
// 'a': LowercaseLetter
325+
// 'X': UppercaseLetter
326+
// '8': DecimalDigitNumber
327+
// ',': OtherPunctuation
328+
// '\ ': SpaceSeparator
329+
// '\t': Control
330+
// '!': OtherPunctuation
331+
// </GetUnicodeCategory>
332+
333+
// <CharacterClassSubtraction>
334+
static void CharacterClassSubtraction()
335+
{
336+
string[] inputs = { "123", "13579753", "3557798", "335599901" };
337+
string pattern = @"^[0-9-[2468]]+$";
338+
339+
foreach (string input in inputs)
340+
{
341+
Match match = Regex.Match(input, pattern);
342+
if (match.Success)
343+
Console.WriteLine(match.Value);
344+
}
345+
}
346+
// The example displays the following output:
347+
// 13579753
348+
// 335599901
349+
// </CharacterClassSubtraction>
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net9.0</TargetFramework>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
</PropertyGroup>
9+
10+
</Project>

0 commit comments

Comments
 (0)