|
| 1 | +from collections import OrderedDict |
| 2 | +from io import TextIOWrapper |
| 3 | +from typing import List, Union |
| 4 | + |
| 5 | + |
| 6 | +def to_raw_cstring(value: Union[str, List[str]]) -> str: |
| 7 | + MAX_LITERAL = 35 * 1024 |
| 8 | + |
| 9 | + if isinstance(value, list): |
| 10 | + value = "\n".join(value) + "\n" |
| 11 | + |
| 12 | + split: List[bytes] = [] |
| 13 | + offset = 0 |
| 14 | + encoded = value.encode() |
| 15 | + |
| 16 | + while offset <= len(encoded): |
| 17 | + segment = encoded[offset : offset + MAX_LITERAL] |
| 18 | + offset += MAX_LITERAL |
| 19 | + if len(segment) == MAX_LITERAL: |
| 20 | + # Try to segment raw strings at double newlines to keep readable. |
| 21 | + pretty_break = segment.rfind(b"\n\n") |
| 22 | + if pretty_break != -1: |
| 23 | + segment = segment[: pretty_break + 1] |
| 24 | + offset -= MAX_LITERAL - pretty_break - 1 |
| 25 | + # If none found, ensure we end with valid utf8. |
| 26 | + # https://github.com/halloleo/unicut/blob/master/truncate.py |
| 27 | + elif segment[-1] & 0b10000000: |
| 28 | + last_11xxxxxx_index = [i for i in range(-1, -5, -1) if segment[i] & 0b11000000 == 0b11000000][0] |
| 29 | + last_11xxxxxx = segment[last_11xxxxxx_index] |
| 30 | + if not last_11xxxxxx & 0b00100000: |
| 31 | + last_char_length = 2 |
| 32 | + elif not last_11xxxxxx & 0b0010000: |
| 33 | + last_char_length = 3 |
| 34 | + elif not last_11xxxxxx & 0b0001000: |
| 35 | + last_char_length = 4 |
| 36 | + |
| 37 | + if last_char_length > -last_11xxxxxx_index: |
| 38 | + segment = segment[:last_11xxxxxx_index] |
| 39 | + offset += last_11xxxxxx_index |
| 40 | + |
| 41 | + split += [segment] |
| 42 | + |
| 43 | + if len(split) == 1: |
| 44 | + return f'R"<!>({split[0].decode()})<!>"' |
| 45 | + else: |
| 46 | + # Wrap multiple segments in parenthesis to suppress `string-concatenation` warnings on clang. |
| 47 | + return "({})".format(" ".join(f'R"<!>({segment.decode()})<!>"' for segment in split)) |
| 48 | + |
| 49 | + |
| 50 | +def get_license_info(src_copyright): |
| 51 | + class LicenseReader: |
| 52 | + def __init__(self, license_file: TextIOWrapper): |
| 53 | + self._license_file = license_file |
| 54 | + self.line_num = 0 |
| 55 | + self.current = self.next_line() |
| 56 | + |
| 57 | + def next_line(self): |
| 58 | + line = self._license_file.readline() |
| 59 | + self.line_num += 1 |
| 60 | + while line.startswith("#"): |
| 61 | + line = self._license_file.readline() |
| 62 | + self.line_num += 1 |
| 63 | + self.current = line |
| 64 | + return line |
| 65 | + |
| 66 | + def next_tag(self): |
| 67 | + if ":" not in self.current: |
| 68 | + return ("", []) |
| 69 | + tag, line = self.current.split(":", 1) |
| 70 | + lines = [line.strip()] |
| 71 | + while self.next_line() and self.current.startswith(" "): |
| 72 | + lines.append(self.current.strip()) |
| 73 | + return (tag, lines) |
| 74 | + |
| 75 | + projects = OrderedDict() |
| 76 | + license_list = [] |
| 77 | + |
| 78 | + with open(src_copyright, "r", encoding="utf-8") as copyright_file: |
| 79 | + reader = LicenseReader(copyright_file) |
| 80 | + part = {} |
| 81 | + while reader.current: |
| 82 | + tag, content = reader.next_tag() |
| 83 | + if tag in ("Files", "Copyright", "License"): |
| 84 | + part[tag] = content[:] |
| 85 | + elif tag == "Comment" and part: |
| 86 | + # attach non-empty part to named project |
| 87 | + projects[content[0]] = projects.get(content[0], []) + [part] |
| 88 | + |
| 89 | + if not tag or not reader.current: |
| 90 | + # end of a paragraph start a new part |
| 91 | + if "License" in part and "Files" not in part: |
| 92 | + # no Files tag in this one, so assume standalone license |
| 93 | + license_list.append(part["License"]) |
| 94 | + part = {} |
| 95 | + reader.next_line() |
| 96 | + |
| 97 | + data_list: list = [] |
| 98 | + for project in iter(projects.values()): |
| 99 | + for part in project: |
| 100 | + part["file_index"] = len(data_list) |
| 101 | + data_list += part["Files"] |
| 102 | + part["copyright_index"] = len(data_list) |
| 103 | + data_list += part["Copyright"] |
| 104 | + |
| 105 | + return {"data": data_list, "projects": projects, "parts": part, "licenses": license_list} |
| 106 | + |
| 107 | + |
| 108 | +def license_builder(target, source, env): |
| 109 | + name_prefix = env.get("name_prefix", "project") |
| 110 | + prefix_upper = name_prefix.upper() |
| 111 | + prefix_capital = name_prefix.capitalize() |
| 112 | + |
| 113 | + license_text_name = f"{prefix_upper}_LICENSE_TEXT" |
| 114 | + component_copyright_part_name = f"{prefix_capital}ComponentCopyrightPart" |
| 115 | + component_copyright_name = f"{prefix_capital}ComponentCopyright" |
| 116 | + copyright_data_name = f"{prefix_upper}_COPYRIGHT_DATA" |
| 117 | + copyright_parts_name = f"{prefix_upper}_COPYRIGHT_PARTS" |
| 118 | + copyright_info_name = f"{prefix_upper}_COPYRIGHT_INFO" |
| 119 | + license_name = f"{prefix_capital}License" |
| 120 | + licenses_name = f"{prefix_upper}_LICENSES" |
| 121 | + |
| 122 | + src_copyright = get_license_info(str(source[0])) |
| 123 | + src_license = str(source[1]) |
| 124 | + |
| 125 | + with open(src_license, "r", encoding="utf-8") as file: |
| 126 | + license_text = file.read() |
| 127 | + |
| 128 | + C_ESCAPABLES = [ |
| 129 | + ("\\", "\\\\"), |
| 130 | + ("\a", "\\a"), |
| 131 | + ("\b", "\\b"), |
| 132 | + ("\f", "\\f"), |
| 133 | + ("\n", "\\n"), |
| 134 | + ("\r", "\\r"), |
| 135 | + ("\t", "\\t"), |
| 136 | + ("\v", "\\v"), |
| 137 | + # ("'", "\\'"), # Skip, as we're only dealing with full strings. |
| 138 | + ('"', '\\"'), |
| 139 | + ] |
| 140 | + C_ESCAPE_TABLE = str.maketrans(dict((x, y) for x, y in C_ESCAPABLES)) |
| 141 | + |
| 142 | + def copyright_data_str() -> str: |
| 143 | + result = "" |
| 144 | + for line in src_copyright["data"]: |
| 145 | + result += f'\t\t"{line}",\n' |
| 146 | + return result |
| 147 | + |
| 148 | + part_indexes = {} |
| 149 | + |
| 150 | + def copyright_part_str() -> str: |
| 151 | + part_index = 0 |
| 152 | + result = "" |
| 153 | + for project_name, project in iter(src_copyright["projects"].items()): |
| 154 | + part_indexes[project_name] = part_index |
| 155 | + for part in project: |
| 156 | + result += ( |
| 157 | + f'\t\t{{ "{part["License"][0].translate(C_ESCAPE_TABLE)}", ' |
| 158 | + + f"{{ &{copyright_data_name}[{part['file_index']}], {len(part['Files'])} }}, " |
| 159 | + + f"{{ &{copyright_data_name}[{part['copyright_index']}], {len(part['Copyright'])} }} }},\n" |
| 160 | + ) |
| 161 | + part_index += 1 |
| 162 | + return result |
| 163 | + |
| 164 | + def copyright_info_str() -> str: |
| 165 | + result = "" |
| 166 | + for project_name, project in iter(src_copyright["projects"].items()): |
| 167 | + result += ( |
| 168 | + f'\t\t{{ "{project_name.translate(C_ESCAPE_TABLE)}", ' |
| 169 | + + f"{{ &{copyright_parts_name}[{part_indexes[project_name]}], {len(project)} }} }},\n" |
| 170 | + ) |
| 171 | + return result |
| 172 | + |
| 173 | + def license_list_str() -> str: |
| 174 | + result = "" |
| 175 | + for license in iter(src_copyright["licenses"]): |
| 176 | + result += ( |
| 177 | + f'\t\t{{ "{license[0].translate(C_ESCAPE_TABLE)}", ' |
| 178 | + + f'\n{to_raw_cstring([line if line != "." else "" for line in license[1:]])} }}, \n' |
| 179 | + ) |
| 180 | + return result |
| 181 | + |
| 182 | + with open(str(target[0]), "wt", encoding="utf-8", newline="\n") as file: |
| 183 | + file.write("/* THIS FILE IS GENERATED. EDITS WILL BE LOST. */\n\n") |
| 184 | + file.write( |
| 185 | + f"""\ |
| 186 | +#pragma once |
| 187 | +
|
| 188 | +#include <array> |
| 189 | +#include <span> |
| 190 | +#include <string_view> |
| 191 | +
|
| 192 | +namespace OpenVic {{ |
| 193 | + static constexpr std::string_view {license_text_name} = // |
| 194 | +{to_raw_cstring(license_text)}; |
| 195 | +
|
| 196 | + struct {component_copyright_part_name} {{ |
| 197 | + std::string_view license; |
| 198 | + std::span<const std::string_view> files; |
| 199 | + std::span<const std::string_view> copyright_statements; |
| 200 | + }}; |
| 201 | +
|
| 202 | + struct {component_copyright_name} {{ |
| 203 | + std::string_view name; |
| 204 | + std::span<const {component_copyright_part_name}> parts; |
| 205 | + int part_count; |
| 206 | + }}; |
| 207 | +
|
| 208 | + static constexpr std::array {copyright_data_name} = std::to_array<std::string_view>({{ |
| 209 | +{copyright_data_str()}\t}}); |
| 210 | + static constexpr std::array {copyright_parts_name} = std::to_array<{component_copyright_part_name}>({{ |
| 211 | +{copyright_part_str()}\t}}); |
| 212 | +
|
| 213 | + static constexpr std::array {copyright_info_name} = std::to_array<{component_copyright_name}>({{ |
| 214 | +{copyright_info_str()}\t}}); |
| 215 | +
|
| 216 | + struct {license_name} {{ |
| 217 | + std::string_view license_name; |
| 218 | + std::string_view license_body; |
| 219 | + }}; |
| 220 | +
|
| 221 | + static constexpr std::array {licenses_name} = std::to_array<{license_name}>({{ |
| 222 | +{license_list_str()}\t}}); |
| 223 | +}} |
| 224 | +""" |
| 225 | + ) |
0 commit comments