Skip to content

Commit a84cf7f

Browse files
HassanElDesoukyxedin
authored andcommitted
[Locale] Serialize YAML to an OnDiskHashTable format and create a tool for serialization
1 parent b647245 commit a84cf7f

File tree

8 files changed

+321
-17
lines changed

8 files changed

+321
-17
lines changed

include/swift/AST/DiagnosticEngine.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "swift/AST/TypeLoc.h"
2525
#include "llvm/ADT/StringSet.h"
2626
#include "llvm/Support/Allocator.h"
27+
#include "llvm/Support/Path.h"
2728
#include "llvm/Support/VersionTuple.h"
2829

2930
namespace swift {
@@ -742,8 +743,26 @@ namespace swift {
742743
void setLocalization(std::string locale, std::string path) {
743744
assert(!locale.empty());
744745
assert(!path.empty());
745-
localization =
746-
std::make_unique<diag::YAMLLocalizationProducer>(locale, path);
746+
llvm::SmallString<128> filePath(path);
747+
llvm::sys::path::append(filePath, locale);
748+
llvm::sys::path::replace_extension(filePath, ".db");
749+
750+
// If the serialized diagnostics file not available,
751+
// fallback to the `YAML` file.
752+
if (llvm::sys::fs::exists(filePath)) {
753+
if (auto file = llvm::MemoryBuffer::getFile(filePath)) {
754+
localization = std::make_unique<diag::SerializedLocalizationProducer>(
755+
std::move(file.get()));
756+
}
757+
} else {
758+
llvm::sys::path::replace_extension(filePath, ".yaml");
759+
// In case of missing localization files, we should fallback to messages
760+
// from `.def` files.
761+
if (llvm::sys::fs::exists(filePath)) {
762+
localization =
763+
std::make_unique<diag::YAMLLocalizationProducer>(filePath.str());
764+
}
765+
}
747766
}
748767

749768
void ignoreDiagnostic(DiagID id) {

include/swift/AST/LocalizationFormat.h

Lines changed: 147 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
//===--- LocalizationFormat.h - YAML format for Diagnostic Messages ---*-
2-
// C++ -*-===//
1+
//===--- LocalizationFormat.h - Format for Diagnostic Messages --*- C++ -*-===//
32
//
43
// This source file is part of the Swift.org open source project
54
//
@@ -19,6 +18,10 @@
1918
#define SWIFT_LOCALIZATIONFORMAT_H
2019

2120
#include "llvm/ADT/StringRef.h"
21+
#include "llvm/Bitstream/BitstreamReader.h"
22+
#include "llvm/Support/DJB.h"
23+
#include "llvm/Support/MemoryBuffer.h"
24+
#include "llvm/Support/OnDiskHashTable.h"
2225
#include "llvm/Support/YAMLParser.h"
2326
#include "llvm/Support/YAMLTraits.h"
2427
#include <string>
@@ -28,6 +31,101 @@ namespace swift {
2831
enum class DiagID : uint32_t;
2932

3033
namespace diag {
34+
using namespace llvm::support;
35+
36+
class LocalizationWriterInfo {
37+
public:
38+
using key_type = llvm::StringRef;
39+
using key_type_ref = key_type;
40+
using data_type = llvm::StringRef;
41+
using data_type_ref = data_type;
42+
using hash_value_type = uint32_t;
43+
using offset_type = uint32_t;
44+
45+
hash_value_type ComputeHash(key_type_ref key) { return llvm::djbHash(key); }
46+
47+
std::pair<unsigned, unsigned> EmitKeyDataLength(llvm::raw_ostream &out,
48+
key_type_ref key,
49+
data_type_ref data) {
50+
offset_type keyLength = static_cast<offset_type>(key.size());
51+
offset_type dataLength = static_cast<offset_type>(data.size());
52+
endian::write<offset_type>(out, keyLength, little);
53+
endian::write<offset_type>(out, dataLength, little);
54+
return {keyLength, dataLength};
55+
}
56+
57+
void EmitKey(llvm::raw_ostream &out, key_type_ref key, unsigned len) {
58+
out << key;
59+
}
60+
61+
void EmitData(llvm::raw_ostream &out, key_type_ref key, data_type_ref data,
62+
unsigned len) {
63+
out << data;
64+
}
65+
};
66+
67+
class LocalizationReaderInfo {
68+
public:
69+
using internal_key_type = llvm::StringRef;
70+
using external_key_type = internal_key_type;
71+
using data_type = llvm::StringRef;
72+
using hash_value_type = uint32_t;
73+
using offset_type = uint32_t;
74+
75+
internal_key_type GetInternalKey(external_key_type key) { return key; }
76+
77+
external_key_type GetExternalKey(internal_key_type key) { return key; }
78+
79+
static bool EqualKey(internal_key_type lhs, internal_key_type rhs) {
80+
return lhs == rhs;
81+
}
82+
83+
hash_value_type ComputeHash(internal_key_type key) {
84+
return llvm::djbHash(key);
85+
}
86+
87+
static std::pair<unsigned, unsigned> ReadKeyDataLength(const uint8_t *&data) {
88+
offset_type keyLength =
89+
endian::readNext<offset_type, little, unaligned>(data);
90+
offset_type dataLength =
91+
endian::readNext<offset_type, little, unaligned>(data);
92+
return {keyLength, dataLength};
93+
}
94+
95+
internal_key_type ReadKey(const uint8_t *data, offset_type length) {
96+
return internal_key_type((const char *)data, length);
97+
}
98+
99+
data_type ReadData(llvm::StringRef Key, const uint8_t *data,
100+
offset_type length) {
101+
return data_type((const char *)data, length);
102+
}
103+
};
104+
105+
class SerializedLocalizationWriter {
106+
using offset_type = LocalizationWriterInfo::offset_type;
107+
llvm::OnDiskChainedHashTableGenerator<LocalizationWriterInfo> generator;
108+
109+
public:
110+
/// Enqueue the given diagnostic to be included in a serialized translations
111+
/// file.
112+
///
113+
/// \param id The identifier associated with the given diagnostic message e.g.
114+
/// 'cannot_convert_argument'.
115+
/// \param translation The localized diagnostic
116+
/// message for the given identifier.
117+
void insert(llvm::StringRef id, llvm::StringRef translation);
118+
119+
/// Write out previously inserted diagnostic translations into the given
120+
/// location.
121+
///
122+
/// \param filePath The location of the serialized diagnostics file. It's
123+
/// supposed to be a file with '.db' postfix.
124+
/// \returns true if all diagnostic
125+
/// messages have been successfully serialized, false otherwise.
126+
bool emit(llvm::StringRef filePath);
127+
};
128+
31129
class LocalizationProducer {
32130
public:
33131
/// If the message isn't available/localized in the current `yaml` file,
@@ -41,9 +139,55 @@ class LocalizationProducer {
41139
};
42140

43141
class YAMLLocalizationProducer final : public LocalizationProducer {
142+
// Type of the `diagnostics` vector.
143+
using T = std::vector<std::string>;
144+
struct Node {
145+
uint32_t id;
146+
typename T::value_type &msg;
147+
};
148+
typedef Node value_type;
149+
150+
class iterator {
151+
typename T::iterator it;
152+
uint32_t counter;
153+
154+
public:
155+
iterator(T::iterator _it, uint32_t counter = 0)
156+
: it(_it), counter(counter) {}
157+
158+
iterator operator++() { return iterator(++it, ++counter); }
159+
160+
bool operator!=(iterator other) { return it != other.it; }
161+
162+
typename T::iterator::value_type node() { return *it; }
163+
164+
value_type operator*() { return value_type{counter, *it}; }
165+
166+
uint32_t index() { return counter; }
167+
};
168+
44169
public:
45170
std::vector<std::string> diagnostics;
46-
explicit YAMLLocalizationProducer(std::string locale, std::string path);
171+
explicit YAMLLocalizationProducer(llvm::StringRef filePath);
172+
llvm::StringRef getMessageOr(swift::DiagID id,
173+
llvm::StringRef defaultMessage) const override;
174+
175+
iterator begin() { return iterator(diagnostics.begin()); }
176+
177+
iterator end() { return iterator(diagnostics.end()); }
178+
};
179+
180+
class SerializedLocalizationProducer final : public LocalizationProducer {
181+
using SerializedLocalizationTable =
182+
llvm::OnDiskIterableChainedHashTable<LocalizationReaderInfo>;
183+
using offset_type = LocalizationReaderInfo::offset_type;
184+
std::unique_ptr<llvm::MemoryBuffer> Buffer;
185+
std::unique_ptr<SerializedLocalizationTable> SerializedTable;
186+
187+
public:
188+
explicit SerializedLocalizationProducer(
189+
std::unique_ptr<llvm::MemoryBuffer> buffer);
190+
47191
llvm::StringRef getMessageOr(swift::DiagID id,
48192
llvm::StringRef defaultMessage) const override;
49193
};

lib/AST/LocalizationFormat.cpp

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
//===--- LocalizationFormat.cpp - YAML format for Diagnostic Messages ---*-
2-
// C++ -*-===//
1+
//===-- LocalizationFormat.cpp - Format for Diagnostic Messages -*- C++ -*-===//
32
//
43
// This source file is part of the Swift.org open source project
54
//
@@ -18,6 +17,7 @@
1817
#include "swift/AST/LocalizationFormat.h"
1918
#include "llvm/ADT/SmallString.h"
2019
#include "llvm/ADT/StringRef.h"
20+
#include "llvm/Bitstream/BitstreamReader.h"
2121
#include "llvm/Support/CommandLine.h"
2222
#include "llvm/Support/MemoryBuffer.h"
2323
#include "llvm/Support/YAMLParser.h"
@@ -32,6 +32,11 @@ enum LocalDiagID : uint32_t {
3232
NumDiags
3333
};
3434

35+
static constexpr const char *const diagnosticID[] = {
36+
#define DIAG(KIND, ID, Options, Text, Signature) #ID,
37+
#include "swift/AST/DiagnosticsAll.def"
38+
};
39+
3540
struct DiagnosticNode {
3641
uint32_t id;
3742
std::string msg;
@@ -68,15 +73,52 @@ template <> struct MappingTraits<DiagnosticNode> {
6873
namespace swift {
6974
namespace diag {
7075

71-
YAMLLocalizationProducer::YAMLLocalizationProducer(std::string locale,
72-
std::string path) {
73-
llvm::SmallString<128> DiagnosticsFilePath(path);
74-
llvm::sys::path::append(DiagnosticsFilePath, locale);
75-
llvm::sys::path::replace_extension(DiagnosticsFilePath, ".yaml");
76-
auto FileBufOrErr = llvm::MemoryBuffer::getFileOrSTDIN(DiagnosticsFilePath);
77-
// Absence of localizations shouldn't crash the compiler.
78-
if (!FileBufOrErr)
79-
return;
76+
void SerializedLocalizationWriter::insert(llvm::StringRef id,
77+
llvm::StringRef translation) {
78+
generator.insert(id, translation);
79+
}
80+
81+
bool SerializedLocalizationWriter::emit(llvm::StringRef filePath) {
82+
assert(llvm::sys::path::extension(filePath) == ".db");
83+
std::error_code error;
84+
llvm::raw_fd_ostream OS(filePath, error, llvm::sys::fs::F_None);
85+
if (OS.has_error()) {
86+
return true;
87+
}
88+
89+
offset_type offset;
90+
{
91+
llvm::support::endian::write<offset_type>(OS, 0, llvm::support::little);
92+
offset = generator.Emit(OS);
93+
}
94+
OS.seek(0);
95+
llvm::support::endian::write(OS, offset, llvm::support::little);
96+
OS.close();
97+
98+
return OS.has_error();
99+
}
100+
101+
SerializedLocalizationProducer::SerializedLocalizationProducer(
102+
std::unique_ptr<llvm::MemoryBuffer> buffer)
103+
: Buffer(std::move(buffer)) {
104+
auto base = reinterpret_cast<const uint8_t *>(Buffer.get()->getBufferStart());
105+
auto tableOffset = endian::read<offset_type>(base, little);
106+
SerializedTable.reset(SerializedLocalizationTable::Create(
107+
base + tableOffset, base + sizeof(offset_type), base));
108+
}
109+
110+
llvm::StringRef SerializedLocalizationProducer::getMessageOr(
111+
swift::DiagID id, llvm::StringRef defaultMessage) const {
112+
auto value = SerializedTable.get()->find(diagnosticID[(unsigned)id]);
113+
llvm::StringRef diagnosticMessage((const char *)value.getDataPtr(),
114+
value.getDataLen());
115+
if (diagnosticMessage.empty())
116+
return defaultMessage;
117+
return diagnosticMessage;
118+
}
119+
120+
YAMLLocalizationProducer::YAMLLocalizationProducer(llvm::StringRef filePath) {
121+
auto FileBufOrErr = llvm::MemoryBuffer::getFileOrSTDIN(filePath);
80122
llvm::MemoryBuffer *document = FileBufOrErr->get();
81123
diag::LocalizationInput yin(document->getBuffer());
82124
yin >> diagnostics;

localization/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ add_custom_command(
66
DEPENDS diagnostics
77
COMMAND "${CMAKE_COMMAND}" -E copy_directory diagnostics/ "${SWIFT_BINARY_DIR}/share/swift/diagnostics/")
88

9+
add_custom_command(
10+
TARGET diagnostic-translation-database
11+
COMMAND swift-serialize-diagnostics --input-file-path="${SWIFT_BINARY_DIR}/share/swift/diagnostics/en.yaml" --output-directory="${SWIFT_BINARY_DIR}/share/swift/diagnostics/")
12+
913
add_dependencies(swift-frontend diagnostic-translation-database)
1014

1115
swift_install_in_component(

test/diagnostics/Localization/fr_localization.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
// RUN: %target-typecheck-verify-swift -localization-path %S/Inputs -locale fr
1+
// RUN: %empty-directory(%t)
2+
// RUN: swift-serialize-diagnostics --input-file-path=%S/Inputs/fr.yaml --output-directory=%t/
3+
// RUN: swift-serialize-diagnostics --input-file-path=%S/Inputs/en.yaml --output-directory=%t/
4+
// RUN: %target-typecheck-verify-swift -localization-path %t -locale fr
25

36
_ = "HI!
47
// expected-error@-1{{chaîne non terminée littérale}}

tools/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ add_swift_tool_subdirectory(swift-ide-test)
2222
add_swift_tool_subdirectory(swift-remoteast-test)
2323
add_swift_tool_subdirectory(swift-demangle)
2424
add_swift_tool_subdirectory(swift-demangle-yamldump)
25+
add_swift_tool_subdirectory(swift-serialize-diagnostics)
2526
add_swift_tool_subdirectory(lldb-moduleimport-test)
2627
add_swift_tool_subdirectory(sil-func-extractor)
2728
add_swift_tool_subdirectory(sil-llvm-gen)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
add_swift_host_tool(swift-serialize-diagnostics
2+
swift-serialize-diagnostics.cpp
3+
SWIFT_COMPONENT tools
4+
)
5+
target_link_libraries(swift-serialize-diagnostics
6+
PRIVATE
7+
swiftAST)

0 commit comments

Comments
 (0)