Skip to content

Commit 33d86d5

Browse files
committed
[byteTree] Write fixed sized data using an optimized form
If we know the size of a type at compile time (like we do for all the integer types), it is cheaper to assign the data buffer directly instead of using a memcpy.
1 parent f97d13d commit 33d86d5

File tree

5 files changed

+123
-38
lines changed

5 files changed

+123
-38
lines changed

include/swift/Basic/ByteTreeSerialization.h

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
#include "llvm/Support/BinaryStreamError.h"
2323
#include "llvm/Support/BinaryStreamWriter.h"
24+
#include "swift/Basic/ExponentialGrowthAppendingBinaryByteStream.h"
2425
#include <map>
2526

2627
namespace {
@@ -35,7 +36,7 @@ class ByteTreeWriter;
3536

3637
using UserInfoMap = std::map<void *, void *>;
3738

38-
/// Add a template specialization of \c ObjectTraits for any that type
39+
/// Add a template specialization of \c ObjectTraits for any type that
3940
/// serializes as an object consisting of multiple fields.
4041
template <class T>
4142
struct ObjectTraits {
@@ -55,7 +56,7 @@ struct ObjectTraits {
5556
// UserInfoMap &UserInfo);
5657
};
5758

58-
/// Add a template specialization of \c ScalarTraits for any that type
59+
/// Add a template specialization of \c ScalarTraits for any type that
5960
/// serializes into a raw set of bytes.
6061
template <class T>
6162
struct ScalarTraits {
@@ -70,7 +71,17 @@ struct ScalarTraits {
7071
// static llvm::Error write(llvm::BinaryStreamWriter &Writer, const T &Value);
7172
};
7273

73-
/// Add a template specialization of \c WrapperTypeTraits for any that type
74+
/// Add a template specialization of \c DirectlyEncodable for any type whose
75+
/// serialized form is equal to its binary representation on the serializing
76+
/// machine.
77+
template <class T>
78+
struct DirectlyEncodable {
79+
// Must provide:
80+
81+
// static bool const value = true;
82+
};
83+
84+
/// Add a template specialization of \c WrapperTypeTraits for any type that
7485
/// serializes as a type that already has a specialization of \c ScalarTypes.
7586
/// This will typically be useful for types like enums that have a 1-to-1
7687
/// mapping to e.g. an integer.
@@ -143,6 +154,12 @@ class ByteTreeWriter {
143154
/// The writer to which the binary data is written.
144155
llvm::BinaryStreamWriter &StreamWriter;
145156

157+
/// The underlying stream of the StreamWriter. We need this reference so that
158+
/// we can call \c ExponentialGrowthAppendingBinaryByteStream.writeRaw
159+
/// which is more efficient than the generic \c writeBytes of
160+
/// \c llvm::BinaryStreamWriter since it avoids the arbitrary size memcopy.
161+
ExponentialGrowthAppendingBinaryByteStream &Stream;
162+
146163
/// The number of fields this object contains. \c UINT_MAX if it has not been
147164
/// set yet. No member may be written to the object if expected number of
148165
/// fields has not been set yet.
@@ -157,8 +174,21 @@ class ByteTreeWriter {
157174

158175
/// The \c ByteTreeWriter can only be constructed internally. Use
159176
/// \c ByteTreeWriter.write to serialize a new object.
160-
ByteTreeWriter(llvm::BinaryStreamWriter &StreamWriter, UserInfoMap &UserInfo)
161-
: StreamWriter(StreamWriter), UserInfo(UserInfo) {}
177+
/// \p Stream must be the underlying stream of \p SteamWriter.
178+
ByteTreeWriter(ExponentialGrowthAppendingBinaryByteStream &Stream,
179+
llvm::BinaryStreamWriter &StreamWriter, UserInfoMap &UserInfo)
180+
: StreamWriter(StreamWriter), Stream(Stream), UserInfo(UserInfo) {}
181+
182+
/// Write the given value to the ByteTree in the same form in which it is
183+
/// represented on the serializing machine.
184+
template <typename T>
185+
llvm::Error writeRaw(T Value) {
186+
// FIXME: We implicitly inherit the endianess of the serializing machine.
187+
// Since we're currently only supporting macOS that's not a problem for now.
188+
auto Error = Stream.writeRaw(StreamWriter.getOffset(), Value);
189+
StreamWriter.setOffset(StreamWriter.getOffset() + sizeof(T));
190+
return Error;
191+
}
162192

163193
/// Set the expected number of fields the object written by this writer is
164194
/// expected to have.
@@ -175,7 +205,7 @@ class ByteTreeWriter {
175205
// Set the most significant bit to indicate that the next construct is an
176206
// object and not a scalar.
177207
uint32_t ToWrite = NumFields | (1 << 31);
178-
auto Error = StreamWriter.writeInteger(ToWrite);
208+
auto Error = writeRaw(ToWrite);
179209
(void)Error;
180210
assert(!Error);
181211

@@ -205,11 +235,13 @@ class ByteTreeWriter {
205235
/// the stream by the specified ProtocolVersion.
206236
template <typename T>
207237
typename std::enable_if<has_ObjectTraits<T>::value, void>::type
208-
static write(uint32_t ProtocolVersion, llvm::BinaryStreamWriter &StreamWriter,
209-
const T &Object, UserInfoMap &UserInfo) {
210-
ByteTreeWriter Writer(StreamWriter, UserInfo);
238+
static write(ExponentialGrowthAppendingBinaryByteStream &Stream,
239+
uint32_t ProtocolVersion, const T &Object,
240+
UserInfoMap &UserInfo) {
241+
llvm::BinaryStreamWriter StreamWriter(Stream);
242+
ByteTreeWriter Writer(Stream, StreamWriter, UserInfo);
211243

212-
auto Error = Writer.StreamWriter.writeInteger(ProtocolVersion);
244+
auto Error = Writer.writeRaw(ProtocolVersion);
213245
(void)Error;
214246
assert(!Error);
215247

@@ -224,7 +256,7 @@ class ByteTreeWriter {
224256
write(const T &Object, unsigned Index) {
225257
validateAndIncreaseFieldIndex(Index);
226258

227-
auto ObjectWriter = ByteTreeWriter(StreamWriter, UserInfo);
259+
auto ObjectWriter = ByteTreeWriter(Stream, StreamWriter, UserInfo);
228260
ObjectWriter.setNumFields(ObjectTraits<T>::numFields(Object, UserInfo));
229261

230262
ObjectTraits<T>::write(ObjectWriter, Object, UserInfo);
@@ -240,7 +272,7 @@ class ByteTreeWriter {
240272
// bitflag that indicates if the next construct in the tree is an object
241273
// or a scalar.
242274
assert((ValueSize & ((uint32_t)1 << 31)) == 0 && "Value size too large");
243-
auto SizeError = StreamWriter.writeInteger(ValueSize);
275+
auto SizeError = writeRaw(ValueSize);
244276
(void)SizeError;
245277
assert(!SizeError);
246278

@@ -254,6 +286,21 @@ class ByteTreeWriter {
254286
"ScalarTraits<T>::size");
255287
}
256288

289+
template <typename T>
290+
typename std::enable_if<DirectlyEncodable<T>::value, void>::type
291+
write(const T &Value, unsigned Index) {
292+
validateAndIncreaseFieldIndex(Index);
293+
294+
uint32_t ValueSize = sizeof(T);
295+
auto SizeError = writeRaw(ValueSize);
296+
(void)SizeError;
297+
assert(!SizeError);
298+
299+
auto ContentError = writeRaw(Value);
300+
(void)ContentError;
301+
assert(!ContentError);
302+
}
303+
257304
template <typename T>
258305
typename std::enable_if<has_WrapperTypeTraits<T>::value, void>::type
259306
write(const T &Value, unsigned Index) {
@@ -268,30 +315,18 @@ class ByteTreeWriter {
268315
// Define serialization schemes for common types
269316

270317
template <>
271-
struct ScalarTraits<uint8_t> {
272-
static unsigned size(const uint8_t &Value) { return 1; }
273-
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
274-
const uint8_t &Value) {
275-
return Writer.writeInteger(Value);
276-
}
318+
struct DirectlyEncodable<uint8_t> {
319+
static bool const value = true;
277320
};
278321

279322
template <>
280-
struct ScalarTraits<uint16_t> {
281-
static unsigned size(const uint16_t &Value) { return 2; }
282-
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
283-
const uint16_t &Value) {
284-
return Writer.writeInteger(Value);
285-
}
323+
struct DirectlyEncodable<uint16_t> {
324+
static bool const value = true;
286325
};
287326

288327
template <>
289-
struct ScalarTraits<uint32_t> {
290-
static unsigned size(const uint32_t &Value) { return 4; }
291-
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
292-
const uint32_t &Value) {
293-
return Writer.writeInteger(Value);
294-
}
328+
struct DirectlyEncodable<uint32_t> {
329+
static bool const value = true;
295330
};
296331

297332
template <>

include/swift/Basic/ExponentialGrowthAppendingBinaryByteStream.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,31 @@ class ExponentialGrowthAppendingBinaryByteStream
5757

5858
llvm::Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override;
5959

60+
/// This is an optimized version of \c writeBytes that assumes we know the
61+
/// size of \p Value at compile time (which in particular holds for integers).
62+
/// It does so by avoiding the memcopy that \c writeBytes requires to copy
63+
/// the arbitrarily sized Buffer to the output buffer and using a direct
64+
/// memory assignment instead.
65+
/// This assumes that the enianess of this steam is the same as the native
66+
/// endianess on the executing machine. No endianess transformations are
67+
/// performed.
68+
template<typename T>
69+
llvm::Error writeRaw(uint32_t Offset, T Value) {
70+
if (auto Error = checkOffsetForWrite(Offset, sizeof(T))) {
71+
return Error;
72+
}
73+
74+
// Resize the internal buffer if needed.
75+
uint32_t RequiredSize = Offset + sizeof(T);
76+
if (RequiredSize > Data.size()) {
77+
Data.resize(RequiredSize);
78+
}
79+
80+
*(T *)(Data.data() + Offset) = Value;
81+
82+
return llvm::Error::success();
83+
}
84+
6085
llvm::Error commit() override { return llvm::Error::success(); }
6186

6287
virtual llvm::BinaryStreamFlags getFlags() const override {

tools/SourceKit/tools/sourcekitd/lib/API/Requests.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2446,10 +2446,9 @@ void serializeSyntaxTreeAsByteTree(
24462446
swift::ExponentialGrowthAppendingBinaryByteStream Stream(
24472447
llvm::support::endianness::little);
24482448
Stream.reserve(32 * 1024);
2449-
llvm::BinaryStreamWriter Writer(Stream);
24502449
std::map<void *, void *> UserInfo;
24512450
UserInfo[swift::byteTree::UserInfoKeyReusedNodeIds] = &ReusedNodeIds;
2452-
swift::byteTree::ByteTreeWriter::write(/*ProtocolVersion=*/1, Writer,
2451+
swift::byteTree::ByteTreeWriter::write(Stream, /*ProtocolVersion=*/1,
24532452
*SyntaxTree.getRaw(), UserInfo);
24542453

24552454
std::unique_ptr<llvm::WritableMemoryBuffer> Buf =

tools/swift-syntax-test/swift-syntax-test.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -729,14 +729,15 @@ int doSerializeRawTree(const char *MainExecutablePath,
729729
return EXIT_FAILURE;
730730
}
731731

732-
llvm::AppendingBinaryByteStream Stream(llvm::support::endianness::little);
733-
llvm::BinaryStreamWriter Writer(Stream);
732+
swift::ExponentialGrowthAppendingBinaryByteStream Stream(
733+
llvm::support::endianness::little);
734+
Stream.reserve(32 * 1024);
734735
std::map<void *, void *> UserInfo;
735736
UserInfo[swift::byteTree::UserInfoKeyReusedNodeIds] = &ReusedNodeIds;
736737
if (options::AddByteTreeFields) {
737738
UserInfo[swift::byteTree::UserInfoKeyAddInvalidFields] = (void *)true;
738739
}
739-
swift::byteTree::ByteTreeWriter::write(/*ProtocolVersion=*/1, Writer,
740+
swift::byteTree::ByteTreeWriter::write(Stream, /*ProtocolVersion=*/1,
740741
*Root, UserInfo);
741742
auto OutputBufferOrError = llvm::FileOutputBuffer::create(
742743
options::OutputFilename, Stream.data().size());

unittests/Basic/ExponentialGrowthAppendingBinaryByteStreamTests.cpp

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,9 @@ TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, GrowMultipleSteps) {
116116
}
117117

118118
TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteIntoMiddle) {
119-
// Test that the stream resizes correctly if we write into its middle
120-
121119
ExponentialGrowthAppendingBinaryByteStream Stream(llvm::support::little);
122120

123-
// Test that the buffer can grow multiple steps at once, e.g. 1 -> 2 -> 4
121+
// Test that the stream resizes correctly if we write into its middle
124122
std::vector<uint8_t> InitialData = {'T', 'e', 's', 't'};
125123
auto InitialDataRef = makeArrayRef(InitialData);
126124
EXPECT_THAT_ERROR(Stream.writeBytes(0, InitialDataRef), Succeeded());
@@ -144,3 +142,30 @@ TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteIntoMiddle) {
144142
EXPECT_EQ(DataAfterInsertRef, Stream.data());
145143
EXPECT_EQ(6u, Stream.getLength());
146144
}
145+
146+
TEST_F(ExponentialGrowthAppendingBinaryByteStreamTest, WriteRaw) {
147+
ExponentialGrowthAppendingBinaryByteStream Stream(llvm::support::little);
148+
149+
// Test the writeRaw method
150+
std::vector<uint8_t> InitialData = {'H', 'e', 'l', 'l', 'o'};
151+
auto InitialDataRef = makeArrayRef(InitialData);
152+
EXPECT_THAT_ERROR(Stream.writeBytes(0, InitialDataRef), Succeeded());
153+
EXPECT_EQ(InitialDataRef, Stream.data());
154+
155+
EXPECT_THAT_ERROR(Stream.writeRaw(5, (uint8_t)' '), Succeeded());
156+
std::vector<uint8_t> AfterFirstInsert = {'H', 'e', 'l', 'l', 'o', ' '};
157+
auto AfterFirstInsertRef = makeArrayRef(AfterFirstInsert);
158+
EXPECT_EQ(AfterFirstInsertRef, Stream.data());
159+
EXPECT_EQ(6u, Stream.getLength());
160+
161+
uint32_t ToInsert = 'w' |
162+
'o' << 8 |
163+
'r' << 16 |
164+
'l' << 24;
165+
EXPECT_THAT_ERROR(Stream.writeRaw(6, ToInsert), Succeeded());
166+
std::vector<uint8_t> AfterSecondInsert = {'H', 'e', 'l', 'l', 'o', ' ',
167+
'w', 'o', 'r', 'l'};
168+
auto AfterSecondInsertRef = makeArrayRef(AfterSecondInsert);
169+
EXPECT_EQ(AfterSecondInsertRef, Stream.data());
170+
EXPECT_EQ(10u, Stream.getLength());
171+
}

0 commit comments

Comments
 (0)