-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv_to_json.py
More file actions
130 lines (115 loc) · 5.44 KB
/
csv_to_json.py
File metadata and controls
130 lines (115 loc) · 5.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import csv
import json
import sys
def parse_field(val, target_type, null_repr='\\N'):
if val == null_repr:
return None
return target_type(val)
def parse_cost_field(val):
"""Parse cost field, handling Infinity and NaN values for JSON compatibility"""
if val == '\\N':
return None
elif val.upper() == 'INFINITY' or val.upper() == 'INF':
return float('inf')
elif val.upper() == 'NAN':
return float('nan')
else:
return float(val)
def main(csv_path):
COLUMNS = [
"query_id", "subquery_id", "subquery_level", "rel_id", "path_id",
"path_type", "child_paths", "startup_cost", "total_cost", "rows",
"width", "rel_name", "rel_alias", "indexoid", "level",
"add_path_result", "displaced_by", "cost_cmp", "fuzz_factor", "pathkeys_cmp",
"bms_cmp", "rows_cmp", "parallel_safe_cmp", "disabled_nodes", "relids"
]
paths = []
with open(csv_path, newline='') as f:
reader = csv.reader(f, delimiter='\t')
for row in reader:
record = dict(zip(COLUMNS, row))
def clean(val):
return None if val == '\\N' else val
try:
# Преобразуем level: если \N — используем 0 по умолчанию
level_val = record["level"]
level = int(level_val) if level_val != '\\N' else 0
path_entry = {
"query_id": parse_field(record["query_id"], int),
"subquery_id": parse_field(record["subquery_id"], int) or 1,
"subquery_level": parse_field(record["subquery_level"], int),
"level": level,
"rel_id": parse_field(record["rel_id"], int),
"path_id": parse_field(record["path_id"], int),
"path_type": clean(record["path_type"]),
"child_paths": [] if record["child_paths"] == '\\N' else [
int(x.strip()) for x in record["child_paths"].strip("{}").split(",") if x.strip()
],
"rel_name": clean(record["rel_name"]),
"rel_alias": clean(record["rel_alias"]),
"startup_cost": parse_cost_field(record["startup_cost"]),
"total_cost": parse_cost_field(record["total_cost"]),
"rows": parse_field(record["rows"], int),
"width": parse_field(record["width"], int),
"indexoid": parse_field(record["indexoid"], int),
"add_path_result": clean(record["add_path_result"]),
"displaced_by": parse_field(record["displaced_by"], int),
"cost_cmp": clean(record["cost_cmp"]),
"fuzz_factor": parse_field(record["fuzz_factor"], float),
"pathkeys_cmp": clean(record["pathkeys_cmp"]),
"bms_cmp": clean(record["bms_cmp"]),
"rows_cmp": clean(record["rows_cmp"]),
"parallel_safe_cmp": clean(record["parallel_safe_cmp"]),
"disabled_nodes": clean(record["disabled_nodes"]),
"relids": clean(record["relids"]),
}
paths.append(path_entry)
except Exception as e:
print(f"Пропущена строка: {row} ({e})", file=sys.stderr)
continue
# Группировка: queries -> subqueries -> relations (без levels)
data = {"queries": {}}
for p in paths:
qid = p["query_id"]
sid = p["subquery_id"]
rid = p["rel_id"]
queries = data["queries"]
if qid not in queries:
queries[qid] = {"subqueries": {}}
subqueries = queries[qid]["subqueries"]
if sid not in subqueries:
subqueries[sid] = {"relations": {}}
relations = subqueries[sid]["relations"]
if rid not in relations:
relations[rid] = {
"name": p["rel_name"],
"alias": p["rel_alias"],
"paths": []
}
# Добавляем поле level в сам путь, так как теперь у нас нет уровней в структуре
p_with_level = p.copy()
p_with_level["level"] = p_with_level.get("level", 1) # Убедимся, что level есть в каждом пути
relations[rid]["paths"].append(p_with_level)
# Function to recursively replace Infinity and NaN in the data structure
def replace_special_floats(obj):
if isinstance(obj, dict):
return {key: replace_special_floats(value) for key, value in obj.items()}
elif isinstance(obj, list):
return [replace_special_floats(item) for item in obj]
elif isinstance(obj, float):
if obj != obj: # NaN check
return "NaN"
elif obj == float('inf'):
return "Infinity"
elif obj == float('-inf'):
return "-Infinity" # Although you mentioned not needing this, keeping for completeness
return obj
# Replace special float values with string representations
processed_data = replace_special_floats(data)
# Вывод JSON
print(json.dumps(processed_data, indent=2))
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Использование: python3 csv_to_json.py <file.csv>", file=sys.stderr)
sys.exit(1)
main(sys.argv[1])