Skip to content

Commit e44b60f

Browse files
antonsyndclaude
andcommitted
fix(core): document BCL shadowing, fix StringBuilder allocations
- Add XML doc remarks to Replace/Contains extensions explaining intentional BCL shadowing (instance methods win at runtime; extensions exist for BuiltinRegistry discovery and internal use) - Fix inconsistent System.Text.StringBuilder qualification in Casefold - Add initial capacity hint to Expandtabs and Replace 3-arg StringBuilder - Add Axiom 1 UTF-16 design rationale comments to StringHelpers.Iterate/Reversed Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3848dcc commit e44b60f

3 files changed

Lines changed: 31 additions & 3 deletions

File tree

src/Sharpy.Core/StringExtensions.Interfaces.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@ public static partial class StringExtensions
1111
/// Return true if <paramref name="substring"/> is found within this string.
1212
/// Used for <c>"x" in s</c> codegen.
1313
/// </summary>
14+
/// <remarks>
15+
/// This extension shadows <see cref="string.Contains(string)"/> by design.
16+
/// C# instance methods take precedence over extensions, so generated code calling
17+
/// <c>s.Contains(x)</c> always invokes the BCL method at runtime. This overload
18+
/// exists so that BuiltinRegistry can discover it via reflection and register
19+
/// <c>str.contains</c> for type-checking. The ordinal semantics here match
20+
/// Python's byte-level containment check.
21+
/// </remarks>
1422
public static bool Contains(this string s, string substring)
1523
{
1624
return s.IndexOf(substring, StringComparison.Ordinal) >= 0;

src/Sharpy.Core/StringExtensions.cs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public static string Swapcase(this string s)
116116
/// </summary>
117117
public static string Casefold(this string s)
118118
{
119-
var sb = new System.Text.StringBuilder(s.Length);
119+
var sb = new StringBuilder(s.Length);
120120
foreach (var c in s)
121121
{
122122
sb.Append(CaseFoldChar(c));
@@ -265,7 +265,7 @@ public static string Removesuffix(this string s, string suffix)
265265
/// </summary>
266266
public static string Expandtabs(this string s, int tabsize = 8)
267267
{
268-
var result = new StringBuilder();
268+
var result = new StringBuilder(s.Length);
269269
int column = 0;
270270

271271
foreach (char c in s)
@@ -300,6 +300,16 @@ public static string Expandtabs(this string s, int tabsize = 8)
300300
/// by <paramref name="new_"/>.
301301
/// Python: <c>str.replace(old, new)</c>
302302
/// </summary>
303+
/// <remarks>
304+
/// This extension shadows <see cref="string.Replace(string, string)"/> by design.
305+
/// C# instance methods take precedence over extensions, so generated code calling
306+
/// <c>s.Replace(old, new_)</c> always invokes the BCL method at runtime. This
307+
/// overload exists for two reasons: (1) BuiltinRegistry discovers it via reflection
308+
/// to register <c>str.replace</c> for type-checking, and (2) the 3-arg overload
309+
/// calls it directly (as a static call) when <c>count &lt; 0</c> to get Python
310+
/// empty-string replacement semantics (BCL throws on empty <paramref name="old"/>
311+
/// in netstandard2.0).
312+
/// </remarks>
303313
public static string Replace(this string s, string old, string new_)
304314
{
305315
if (old.Length == 0)
@@ -336,7 +346,8 @@ public static string Replace(this string s, string old, string new_, int count)
336346

337347
if (old.Length == 0)
338348
{
339-
var sb = new StringBuilder();
349+
int insertions = count < s.Length + 1 ? count : s.Length + 1;
350+
var sb = new StringBuilder(new_.Length * insertions + s.Length);
340351
int replacements = 0;
341352
if (replacements < count)
342353
{

src/Sharpy.Core/StringHelpers.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ public static string GetItem(string s, int index)
6666
/// Python iterates strings yielding single-char strings, not chars.
6767
/// Used for <c>for c in s:</c> codegen.
6868
/// </summary>
69+
/// <remarks>
70+
/// Iterates by UTF-16 code unit, not Unicode code point. Surrogate pairs
71+
/// (e.g., emoji) yield two separate single-char strings. This follows
72+
/// Axiom 1 (.NET UTF-16 semantics take precedence over Python code-point iteration).
73+
/// </remarks>
6974
public static IEnumerable<string> Iterate(string s)
7075
{
7176
for (int i = 0; i < s.Length; i++)
@@ -78,6 +83,10 @@ public static IEnumerable<string> Iterate(string s)
7883
/// Yields single-character strings in reverse order.
7984
/// Used for <c>reversed(s)</c> codegen.
8085
/// </summary>
86+
/// <remarks>
87+
/// Iterates by UTF-16 code unit, not Unicode code point. See
88+
/// <see cref="Iterate"/> remarks for Axiom 1 rationale.
89+
/// </remarks>
8190
public static IEnumerable<string> Reversed(string s)
8291
{
8392
for (int i = s.Length - 1; i >= 0; i--)

0 commit comments

Comments
 (0)