Skip to content

Commit 62d04c4

Browse files
authored
Merge pull request #14 from padovan/standardize-trees-name
ingester: standardize tree names across origins
2 parents 15f1df4 + b0bcb8c commit 62d04c4

File tree

3 files changed

+182
-2
lines changed

3 files changed

+182
-2
lines changed

Dockerfile.ingester

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ RUN git clone https://github.com/kernelci/kcidb.git && \
99
pip install -e .
1010

1111
COPY ingester/ingester.py /app/ingester.py
12+
COPY data/trees.yml /app/trees.yml
1213

1314
CMD ["python", "/app/ingester.py", "--spool-dir", "/app/spool"]
1415

data/trees.yml

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
trees:
2+
aaptel:
3+
url: "https://github.com/aaptel/linux.git"
4+
5+
amlogic:
6+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/amlogic/linux.git"
7+
8+
android:
9+
url: 'https://android.googlesource.com/kernel/common'
10+
11+
ardb:
12+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git"
13+
14+
arm64:
15+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git"
16+
17+
arnd:
18+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground.git"
19+
20+
broonie-misc:
21+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git"
22+
23+
broonie-regmap:
24+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git"
25+
26+
broonie-regulator:
27+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git"
28+
29+
broonie-sound:
30+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git"
31+
32+
broonie-spi:
33+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git"
34+
35+
chrome-platform:
36+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git"
37+
38+
chromiumos:
39+
url: "https://chromium.googlesource.com/chromiumos/third_party/kernel.git"
40+
41+
cip:
42+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/cip/linux-cip.git"
43+
44+
clk:
45+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git"
46+
47+
collabora-chromeos-kernel:
48+
url: 'https://gitlab.collabora.com/google/chromeos-kernel.git'
49+
50+
efi:
51+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git"
52+
53+
hyperv:
54+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git"
55+
56+
kernelci:
57+
url: "https://github.com/kernelci/linux.git"
58+
59+
khilman:
60+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/khilman/linux.git"
61+
62+
krzysztof:
63+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git"
64+
65+
kselftest:
66+
url: 'https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git'
67+
68+
lee-backlight:
69+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git"
70+
71+
lee-mfd:
72+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git"
73+
74+
linusw:
75+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git"
76+
77+
linux-pci:
78+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git"
79+
80+
mainline:
81+
url: 'https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git'
82+
83+
media-committers:
84+
url: 'https://gitlab.freedesktop.org/linux-media/media-committers.git'
85+
86+
media:
87+
url: 'https://git.linuxtv.org/media.git'
88+
89+
mediatek:
90+
url: 'https://git.kernel.org/pub/scm/linux/kernel/git/mediatek/linux.git'
91+
92+
net-next:
93+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git"
94+
95+
netdev-testing:
96+
url: "https://github.com/linux-netdev/testing.git"
97+
98+
next:
99+
url: 'https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git'
100+
101+
omap:
102+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/khilman/linux-omap.git"
103+
104+
pm:
105+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git"
106+
107+
qcom:
108+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git"
109+
110+
renesas:
111+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git"
112+
113+
riscv:
114+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git"
115+
116+
robh:
117+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git"
118+
119+
rppt:
120+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git"
121+
122+
sashal-next:
123+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/sashal/linus-next.git"
124+
125+
soc:
126+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git"
127+
128+
stable-rc:
129+
url: 'https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git'
130+
131+
stable-rt:
132+
url: 'https://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git'
133+
134+
stable:
135+
url: 'https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git'
136+
137+
tegra:
138+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux.git"
139+
140+
thermal:
141+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/thermal/linux.git"
142+
143+
tip:
144+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git"
145+
146+
ulfh:
147+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git"
148+
149+
vireshk:
150+
url: "https://git.kernel.org/pub/scm/linux/kernel/git/vireshk/linux.git"

ingester/ingester.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import json
2323
import time
2424
import logging
25+
import yaml
2526

2627
# default database
2728
DATABASE = "postgresql:dbname=kcidb user=kcidb password=kcidb host=localhost port=5432"
@@ -60,8 +61,34 @@ def move_file_to_failed_dir(filename, failed_dir):
6061
print(f"Error moving file {filename} to failed directory: {e}")
6162
raise e
6263

64+
TREES_FILE = "/app/trees.yml"
6365

64-
def ingest_submissions(spool_dir, db_client=None):
66+
def load_trees_name():
67+
with open(TREES_FILE, "r", encoding="utf-8") as f:
68+
data = yaml.safe_load(f)
69+
70+
trees_name = {
71+
v["url"]: tree_name
72+
for tree_name, v in data.get("trees", {}).items()
73+
}
74+
75+
return trees_name
76+
77+
78+
def standardize_trees_name(input_data, trees_name):
79+
""" Standardize tree names in input data using the provided mapping """
80+
81+
for checkout in input_data.get("checkouts", []):
82+
git_url = checkout.get("git_repository_url")
83+
if git_url in trees_name:
84+
correct_tree = trees_name[git_url]
85+
if checkout.get("tree_name") != correct_tree:
86+
checkout["tree_name"] = correct_tree
87+
88+
return input_data
89+
90+
91+
def ingest_submissions(spool_dir, trees_name, db_client=None):
6592
failed_dir = os.path.join(spool_dir, "failed")
6693
archive_dir = os.path.join(spool_dir, "archive")
6794
if db_client is None:
@@ -89,6 +116,7 @@ def ingest_submissions(spool_dir, db_client=None):
89116
logger.info(f"File size: {fsize}")
90117
try:
91118
data = json.loads(f.read())
119+
data = standardize_trees_name(data, trees_name)
92120
data = io_schema.validate(data)
93121
data = io_schema.upgrade(data, copy=False)
94122
db_client.load(data)
@@ -147,10 +175,11 @@ def main():
147175
args = parser.parse_args()
148176
logger.info("Starting ingestion process...")
149177
verify_spool_dirs(args.spool_dir)
178+
trees_name = load_trees_name()
150179
get_db_credentials()
151180
db_client = get_db_client(DATABASE)
152181
while True:
153-
ingest_submissions(args.spool_dir, db_client)
182+
ingest_submissions(args.spool_dir, trees_name, db_client)
154183
time.sleep(1)
155184

156185
if __name__ == "__main__":

0 commit comments

Comments
 (0)