Skip to content

Commit f7bb2d9

Browse files
committed
Add initial repartition script.
Fixes #5
1 parent 89afccf commit f7bb2d9

File tree

4 files changed

+225
-3
lines changed

4 files changed

+225
-3
lines changed

scripts/repartition-index.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
import json
2+
import re
3+
import sys
4+
5+
from collections import OrderedDict
6+
from pathlib import Path
7+
from urllib.request import Request, urlopen
8+
9+
REPO = Path(__file__).absolute().parent.parent
10+
sys.path.append(str(REPO / "src"))
11+
12+
from manage.urlutils import IndexDownloader
13+
from manage.tagutils import CompanyTag, tag_or_range
14+
from manage.verutils import Version
15+
16+
17+
def usage():
18+
print("Usage: repartition-index.py [-i options <FILENAME> ...] [options <OUTPUT> ...]")
19+
print()
20+
print(" -i <FILENAME> One or more files to read existing entries from.")
21+
print(" -i -n/--no-recurse Do not follow 'next' info")
22+
print()
23+
print(" <OUTPUT> Filename to write entries into")
24+
print(" -d/--allow-dup Include entries written in previous outputs")
25+
print(" --pre Include entries marked as prereleases")
26+
print(" -t/--tag TAG Include entries matching the specified tag")
27+
print(" -r/--range RANGE Include entries included within the specified range")
28+
print(" --latest-micro Include entries that are the latest x.y.z version")
29+
print()
30+
print("An output of 'nul' is permitted to drop entries.")
31+
print("Providing the same inputs and outputs is permitted, as all inputs are read")
32+
print("before any outputs are written.")
33+
sys.exit(1)
34+
35+
36+
class ReadFile:
37+
def __init__(self):
38+
self.source = None
39+
self.recurse = True
40+
41+
def add_arg(self, arg):
42+
if arg[:1] != "-":
43+
self.source = arg
44+
return True
45+
if arg in ("-n", "--no-recurse"):
46+
self.recurse = False
47+
return False
48+
raise ValueError("Unknown argument: " + arg)
49+
50+
def execute(self, versions, context):
51+
for _, data in IndexDownloader(self.source, lambda *a: a):
52+
versions.extend(data["versions"])
53+
if not self.recurse:
54+
break
55+
56+
57+
class SortVersions:
58+
def __init__(self):
59+
pass
60+
61+
def add_arg(self, arg):
62+
raise ValueError("Unknown argument: " + arg)
63+
64+
def _number_sortkey(self, k):
65+
bits = []
66+
for n in re.split(r"(\d+)", k):
67+
try:
68+
bits.append(f"{int(n):020}")
69+
except ValueError:
70+
bits.append(n)
71+
return tuple(bits)
72+
73+
def _sort_key(self, v):
74+
from manage.tagutils import _CompanyKey, _DescendingVersion
75+
return (
76+
_DescendingVersion(v["sort-version"]),
77+
_CompanyKey(v["company"]),
78+
self._number_sortkey(v["id"]),
79+
)
80+
81+
def execute(self, versions, context):
82+
versions.sort(key=self._sort_key)
83+
84+
85+
class SplitToFile:
86+
def __init__(self):
87+
self.target = None
88+
self.allow_dup = False
89+
self.pre = False
90+
self.tag_or_range = None
91+
self._expect_tag_or_range = False
92+
self.latest_micro = False
93+
94+
def add_arg(self, arg):
95+
if arg[:1] != "-":
96+
if self._expect_tag_or_range:
97+
self.tag_or_range = tag_or_range(arg)
98+
self._expect_tag_or_range = False
99+
return False
100+
self.target = arg
101+
return True
102+
if arg in ("-d", "--allow-dup"):
103+
self.allow_dup = True
104+
return False
105+
if arg == "--pre":
106+
self.pre = True
107+
return False
108+
if arg in ("-t", "--tag", "-r", "--range"):
109+
self._expect_tag_or_range = True
110+
return False
111+
if arg == "--latest-micro":
112+
self.latest_micro = True
113+
return False
114+
raise ValueError("Unknown argument: " + arg)
115+
116+
def execute(self, versions, context):
117+
written = context.setdefault("written", set())
118+
outputs = context.setdefault("outputs", {})
119+
if self.target != "nul":
120+
try:
121+
output = outputs[self.target]
122+
except KeyError:
123+
context.setdefault("output_order", []).append(self.target)
124+
output = outputs.setdefault(self.target, [])
125+
else:
126+
# Write to a list that'll be forgotten
127+
output = []
128+
129+
latest_micro_skip = set()
130+
131+
for i in versions:
132+
k = i["id"].casefold(), i["sort-version"].casefold()
133+
v = Version(i["sort-version"])
134+
if not self.allow_dup and k in written:
135+
continue
136+
if not self.pre and v.is_prerelease:
137+
continue
138+
if self.tag_or_range and not any(
139+
self.tag_or_range.satisfied_by(CompanyTag(i["company"], t))
140+
for t in i["install-for"]
141+
):
142+
continue
143+
if self.latest_micro:
144+
k2 = i["id"].casefold(), v.to_python_style(2, with_dev=False)
145+
if k2 in latest_micro_skip:
146+
continue
147+
latest_micro_skip.add(k2)
148+
output.append(i)
149+
written.add(k)
150+
151+
152+
class WriteFiles:
153+
def __init__(self):
154+
self.indent = None
155+
156+
def add_arg(self, arg):
157+
if arg == "-w-indent":
158+
self.indent = 4
159+
return False
160+
if arg == "-w-indent1":
161+
self.indent = 1
162+
return False
163+
raise ValueError("Unknown argument: " + arg)
164+
165+
def execute(self, versions, context):
166+
outputs = context.get("outputs") or {}
167+
output_order = context.get("output_order", [])
168+
for target, next_target in zip(output_order, [*output_order[1:], None]):
169+
data = {
170+
"versions": outputs[target]
171+
}
172+
if next_target:
173+
data["next"] = next_target
174+
with open(target, "w", encoding="utf-8") as f:
175+
json.dump(data, f, indent=self.indent)
176+
177+
178+
def parse_cli(args):
179+
plan_read = []
180+
plan_split = []
181+
sort = SortVersions()
182+
action = None
183+
write = WriteFiles()
184+
for a in args:
185+
if a == "-i":
186+
action = ReadFile()
187+
plan_read.append(action)
188+
elif a.startswith("-s-"):
189+
sort.add_arg(a)
190+
elif a.startswith("-w-"):
191+
write.add_arg(a)
192+
else:
193+
try:
194+
if action is None:
195+
action = SplitToFile()
196+
plan_split.append(action)
197+
if action.add_arg(a):
198+
action = None
199+
continue
200+
except ValueError:
201+
pass
202+
usage()
203+
return [*plan_read, sort, *plan_split, write]
204+
205+
206+
if __name__ == "__main__":
207+
plan = parse_cli(sys.argv[1:])
208+
VERSIONS = []
209+
CONTEXT = {}
210+
for p in plan:
211+
p.execute(VERSIONS, CONTEXT)
212+

src/manage/tagutils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ def startswith(self, other):
2626
return self._company.startswith(other._company)
2727
return self._company == other._company
2828

29+
def __hash__(self):
30+
return hash(self._company)
31+
2932
def __eq__(self, other):
3033
return self._company == other._company
3134

@@ -64,6 +67,9 @@ def startswith(self, other):
6467
return not self.s
6568
return self.s.startswith(other.s)
6669

70+
def __hash__(self):
71+
return hash(self.s)
72+
6773
def __eq__(self, other):
6874
if not isinstance(other, type(self)):
6975
return False

src/manage/urlutils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -677,10 +677,11 @@ def __next__(self):
677677
LOGGER.error("An unexpected error occurred while downloading the index: %s", ex)
678678
raise
679679

680-
index = self.index_cls(self._url, json.loads(data))
680+
j = json.loads(data)
681+
index = self.index_cls(self._url, j)
681682

682-
if index.next_url:
683-
self._url = urljoin(url, index.next_url, to_parent=True)
683+
if j.get("next"):
684+
self._url = urljoin(url, j["next"], to_parent=True)
684685
else:
685686
self._url = None
686687
return index

src/manage/verutils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ def __str__(self):
5353
def __repr__(self):
5454
return self.s
5555

56+
def __hash__(self):
57+
return hash(self.sortkey)
58+
5659
def _are_equal(self, other, prefix_match=None, other_prefix_match=None, prerelease_match=None):
5760
if other is None:
5861
return False

0 commit comments

Comments
 (0)