Skip to content

Commit 2d73b21

Browse files
committed
StringOptimization: optimize C string literals.
Replace a String initializer followed by String.utf8CString with a (UTF8 encoded) string literal. This code pattern is generated when calling C functions with string literals, e.g. puts("hello!") rdar://74941849
1 parent 5be350a commit 2d73b21

File tree

3 files changed

+111
-0
lines changed

3 files changed

+111
-0
lines changed

lib/SILOptimizer/Transforms/StringOptimization.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ namespace {
3939
/// * Replaces x.append(y) with x = x + y if x and y are constant strings.
4040
/// * Replaces _typeName(T.self) with a constant string if T is statically
4141
/// known.
42+
/// * Replaces String(literal).utf8CString with the string literal itself.
4243
///
4344
/// This pass must run on high-level SIL, where semantic calls are still in
4445
/// place.
@@ -92,6 +93,7 @@ class StringOptimization {
9293
llvm::DenseMap<SILValue, SILValue> &storedStrings);
9394
bool optimizeStringConcat(ApplyInst *concatCall);
9495
bool optimizeTypeName(ApplyInst *typeNameCall);
96+
bool optimizeGetCString(ApplyInst *getCStringCall);
9597

9698
static ApplyInst *isSemanticCall(SILInstruction *inst, StringRef attr,
9799
unsigned numArgs);
@@ -156,6 +158,13 @@ bool StringOptimization::optimizeBlock(SILBasicBlock &block) {
156158
continue;
157159
}
158160
}
161+
if (ApplyInst *getCString = isSemanticCall(inst,
162+
semantics::STRING_GET_UTF8_CSTRING, 1)) {
163+
if (optimizeGetCString(getCString)) {
164+
changed = true;
165+
continue;
166+
}
167+
}
159168
// Remove items from storedStrings if inst overwrites (or potentially
160169
// overwrites) a stored String in an identifyable object.
161170
invalidateModifiedObjects(inst, storedStrings);
@@ -328,6 +337,50 @@ bool StringOptimization::optimizeTypeName(ApplyInst *typeNameCall) {
328337
return true;
329338
}
330339

340+
/// Replaces a String initializer followed by String.utf8CString with a
341+
/// (UTF8 encoded) string literal.
342+
///
343+
/// Note that string literals are always generated with a trailing 0-byte.
344+
bool StringOptimization::optimizeGetCString(ApplyInst *getCStringCall) {
345+
// Is this a String.utf8CString of a literal String?
346+
StringInfo stringInfo = getStringInfo(getCStringCall->getArgument(0));
347+
if (!stringInfo.isConstant())
348+
return false;
349+
350+
StringLiteralInst *literal = nullptr;
351+
bool changed = false;
352+
SmallVector<SILInstruction *, 16> workList;
353+
workList.push_back(getCStringCall);
354+
355+
/// String.utf8CString returns an array of Int8. Search for ref_tail_addr of
356+
/// the array buffer.
357+
while (!workList.empty()) {
358+
SILInstruction *inst = workList.pop_back_val();
359+
// Look through string_extract which extract the buffer from the array.
360+
if (isa<StructExtractInst>(inst) || inst == getCStringCall) {
361+
for (Operand *use : cast<SingleValueInstruction>(inst)->getUses()) {
362+
workList.push_back(use->getUser());
363+
}
364+
continue;
365+
}
366+
if (auto *rta = dyn_cast<RefTailAddrInst>(inst)) {
367+
// Replace the ref_tail_addr with a pointer_to_address of the string
368+
// literal.
369+
if (!literal) {
370+
// Build the literal if we don't have one, yet.
371+
SILBuilder builder(getCStringCall);
372+
literal = builder.createStringLiteral(getCStringCall->getLoc(),
373+
stringInfo.str, StringLiteralInst::Encoding::UTF8);
374+
}
375+
SILBuilder builder(rta);
376+
auto *strAddr = builder.createPointerToAddress(rta->getLoc(), literal,
377+
rta->getType(), /*isStrict*/ false);
378+
rta->replaceAllUsesWith(strAddr);
379+
changed = true;
380+
}
381+
}
382+
return changed;
383+
}
331384

332385
/// Returns the apply instruction if \p inst is a call of a function which has
333386
/// a semantic attribute \p attr and exactly \p numArgs arguments.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// RUN: %target-build-swift -O %s -module-name=test -emit-sil | %FileCheck %s
2+
3+
// RUN: %empty-directory(%t)
4+
// RUN: %target-build-swift -O -module-name=test %s -o %t/a.out
5+
// RUN: %target-run %t/a.out | %FileCheck %s -check-prefix=CHECK-OUTPUT
6+
7+
// REQUIRES: executable_test,swift_stdlib_no_asserts
8+
9+
#if canImport(Darwin)
10+
import Darwin
11+
#elseif canImport(Glibc)
12+
import Glibc
13+
#elseif os(Windows)
14+
import CRT
15+
#else
16+
#error("Unsupported platform")
17+
#endif
18+
19+
// CHECK-LABEL: sil [noinline] @$s4test0A26StringConstantForCFunctionyyF
20+
// CHECK-NOT: apply
21+
// CHECK: [[L:%[0-9]+]] = string_literal utf8 "Hello world!"
22+
// CHECK-NOT: apply
23+
// CHECK: [[P:%[0-9]+]] = struct $UnsafePointer<Int8> ([[L]] : $Builtin.RawPointer)
24+
// CHECK-NOT: apply
25+
// CHECK: [[O:%[0-9]+]] = enum $Optional<UnsafePointer<Int8>>, #Optional.some!enumelt, [[P]]
26+
// CHECK-NOT: apply
27+
// CHECK: [[F:%[0-9]+]] = function_ref @puts
28+
// CHECK: apply [[F]]([[O]])
29+
// CHECK: } // end sil function '$s4test0A26StringConstantForCFunctionyyF'
30+
@inline(never)
31+
public func testStringConstantForCFunction() {
32+
puts("Hello " + "world!")
33+
}
34+
35+
// CHECK-OUTPUT: Hello world!
36+
testStringConstantForCFunction()

test/SILOptimizer/string_optimization.sil

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ sil [readonly] [_semantics "string.init_empty"] @empty_string : $@convention(met
1111
sil [_semantics "string.init_empty"] @empty_string_with_capacity : $@convention(method) (Int, @thin String.Type) -> @owned String
1212
sil [_semantics "string.append"] @string_append : $@convention(method) (@guaranteed String, @inout String) -> ()
1313
sil [_semantics "string.concat"] @string_concat : $@convention(method) (@guaranteed String, @guaranteed String, @thin String.Type) -> @owned String
14+
sil [readonly] [_semantics "string.getUTF8CString"] @string_getUTF8CString : $@convention(method) (@guaranteed String) -> @owned ContiguousArray<Int8>
1415

1516
// CHECK-LABEL: sil @append_to_empty_string
1617
// CHECK: [[S:%[0-9]+]] = alloc_stack $String
@@ -232,3 +233,24 @@ bb0(%0 : $String):
232233
return %13 : $String
233234
}
234235

236+
// CHECK-LABEL: sil @test_cstring
237+
// CHECK: %0 = string_literal utf8 "a"
238+
// CHECK: return %0
239+
// CHECK: } // end sil function 'test_cstring'
240+
sil @test_cstring : $@convention(thin) () -> Builtin.RawPointer {
241+
bb0:
242+
%3 = string_literal utf8 "a"
243+
%4 = integer_literal $Builtin.Word, 1
244+
%5 = integer_literal $Builtin.Int1, -1
245+
%6 = metatype $@thin String.Type
246+
%7 = function_ref @$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC : $@convention(method) (Builtin.RawPointer, Builtin.Word, Builtin.Int1, @thin String.Type) -> @owned String
247+
%8 = apply %7(%3, %4, %5, %6) : $@convention(method) (Builtin.RawPointer, Builtin.Word, Builtin.Int1, @thin String.Type) -> @owned String
248+
%9 = function_ref @string_getUTF8CString : $@convention(method) (@guaranteed String) -> @owned ContiguousArray<Int8>
249+
%10 = apply %9(%8) : $@convention(method) (@guaranteed String) -> @owned ContiguousArray<Int8>
250+
%11 = struct_extract %10 : $ContiguousArray<Int8>, #ContiguousArray._buffer
251+
%12 = struct_extract %11 : $_ContiguousArrayBuffer<Int8>, #_ContiguousArrayBuffer._storage
252+
%13 = ref_tail_addr %12 : $__ContiguousArrayStorageBase, $Int8
253+
%14 = address_to_pointer %13 : $*Int8 to $Builtin.RawPointer
254+
return %14 : $Builtin.RawPointer
255+
}
256+

0 commit comments

Comments
 (0)