-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathload_codepoints.h
More file actions
103 lines (84 loc) · 3.19 KB
/
load_codepoints.h
File metadata and controls
103 lines (84 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#ifndef IFT_CONFIG_LOAD_CODEPOINTS_H_
#define IFT_CONFIG_LOAD_CODEPOINTS_H_
#include <optional>
#include <vector>
#include "absl/container/btree_set.h"
#include "absl/container/flat_hash_map.h"
#include "absl/status/statusor.h"
#include "ift/common/font_data.h"
#include "ift/common/font_helper.h"
#include "ift/common/int_set.h"
#include "ift/freq/unicode_frequencies.h"
namespace ift::config {
template <typename T>
ift::common::IntSet Values(const T& proto_set) {
ift::common::IntSet result;
for (uint32_t v : proto_set.values()) {
result.insert(v);
}
return result;
}
template <typename T>
absl::btree_set<hb_tag_t> TagValues(const T& proto_set) {
absl::btree_set<hb_tag_t> result;
for (const auto& tag : proto_set.values()) {
result.insert(ift::common::FontHelper::ToTag(tag));
}
return result;
}
// Loads the file at path and returns it's binary contents.
absl::StatusOr<ift::common::FontData> LoadFile(const char* path);
// Loads a Riegeli file of CodepointCount protos and returns a
// UnicodeFrequencies instance.
//
// Append "@*" to the path to load all sharded files for this path.
// For example "FrequencyData.riegeli@*" will load all files of the
// form FrequencyData.riegeli-*-of-* into the frequency data set.
absl::StatusOr<ift::freq::UnicodeFrequencies> LoadFrequenciesFromRiegeli(
const char* path);
// loads frequency data from https://github.com/w3c/ift-encoder-data
//
// name is the file name to load.
// Append "@*" to the name to load all sharded files for a name.
absl::StatusOr<ift::freq::UnicodeFrequencies> LoadBuiltInFrequencies(
const char* name);
// Returns a list of all built-in frequency data sets and the codepoints
// they cover.
absl::StatusOr<absl::flat_hash_map<std::string, ift::common::CodepointSet>>
BuiltInFrequenciesList();
// Given a filepath if it ends with @* this will expand the path into
// the list of paths matching the pattern: <path>-?????-of-?????
// Otherwise returns just the input path.
//
// Checks that the input path exists and will return a NotFoundError if
// it does not.
absl::StatusOr<std::vector<std::string>> ExpandShardedPath(const char* path);
struct CodepointAndFrequency {
uint32_t codepoint;
std::optional<uint64_t> frequency;
bool operator<(const CodepointAndFrequency& rhs) const {
if (frequency == rhs.frequency) {
return codepoint < rhs.codepoint;
}
if (frequency.has_value() && !rhs.frequency.has_value()) {
return true;
}
if (!frequency.has_value() && rhs.frequency.has_value()) {
return false;
}
// Sort from highest to lowest frequency.
return *frequency > *rhs.frequency;
}
friend void PrintTo(const CodepointAndFrequency& point, std::ostream* os);
};
// Loads the codepoint file at path and returns it contents.
//
// - Retains the ordering and any duplicate codepoints listed in the original
// file.
// - A codepoint file has one codepoint per line in hexadecimal form 0xXXXX
// - An optional frequency can be provided as a second column, comma separated.
// - Lines starting with "#" are ignored.
absl::StatusOr<std::vector<CodepointAndFrequency>> LoadCodepointsOrdered(
const char* path);
} // namespace ift::config
#endif // IFT_CONFIG_LOAD_CODEPOINTS_H_