|
4 | 4 | import joblib |
5 | 5 | from scipy.sparse import save_npz, load_npz |
6 | 6 |
|
| 7 | + |
| 8 | +def get_project_root(): |
| 9 | + return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 10 | + |
| 11 | + |
| 12 | +def get_processed_path(file_name, is_json=False): |
| 13 | + ext = '.json' if is_json else '.gz' |
| 14 | + if not file_name.endswith(ext): |
| 15 | + file_name += ext |
| 16 | + return os.path.join(get_project_root(), "data", "processed", file_name) |
| 17 | + |
| 18 | + |
7 | 19 | def export_data_to_json(data, file_name, is_json=False): |
8 | | - project_root = os.path.dirname( |
9 | | - os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
10 | | - ) |
| 20 | + path = get_processed_path(file_name, is_json) |
11 | 21 | if is_json: |
12 | | - if not file_name.endswith('.json'): |
13 | | - file_name += '.json' |
| 22 | + with open(path, 'w', encoding='utf-8') as f: |
| 23 | + json.dump(data, f, ensure_ascii=False, indent=2) |
14 | 24 | else: |
15 | | - if not file_name.endswith('.gz'): |
16 | | - file_name += '.gz' |
17 | | - path = os.path.join(project_root, "data", "processed", file_name) |
| 25 | + with gzip.open(path, 'wt', encoding='utf-8') as f: |
| 26 | + json.dump(data, f, ensure_ascii=False, indent=2) |
18 | 27 |
|
19 | | - with gzip.open(path, 'wt', encoding='utf-8') as f: |
20 | | - json.dump(data, f, ensure_ascii=False, indent=2) |
21 | | - |
22 | 28 |
|
23 | 29 | def import_processed_json(file_name, is_json=False): |
24 | | - project_root = os.path.dirname( |
25 | | - os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
26 | | - ) |
| 30 | + path = get_processed_path(file_name, is_json) |
| 31 | + if not os.path.exists(path): |
| 32 | + return None |
27 | 33 | if is_json: |
28 | | - if not file_name.endswith('.json'): |
29 | | - file_name += '.json' |
30 | | - path = os.path.join(project_root, "data", "processed", file_name) |
31 | | - if not os.path.exists(path): |
32 | | - return None |
33 | 34 | with open(path, 'r', encoding='utf-8') as f: |
34 | | - data = json.load(f) |
35 | | - return data |
| 35 | + return json.load(f) |
36 | 36 | else: |
37 | | - if not file_name.endswith('.gz'): |
38 | | - file_name += '.gz' |
39 | | - path = os.path.join(project_root, "data", "processed", file_name) |
40 | | - if not os.path.exists(path): |
41 | | - return None |
42 | 37 | with gzip.open(path, 'rt', encoding='utf-8') as f: |
43 | | - data = json.load(f) |
44 | | - return data |
| 38 | + return json.load(f) |
45 | 39 |
|
46 | | -def export_models(data, file_name): |
47 | | - project_root = os.path.dirname( |
48 | | - os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
49 | | - ) |
50 | | - path = os.path.join(project_root, "models", file_name) |
51 | 40 |
|
52 | | - import joblib |
53 | | - joblib.dump(data, path) |
| 41 | +def export_models(model, file_name): |
| 42 | + path = os.path.join(get_project_root(), "models", file_name) |
| 43 | + joblib.dump(model, path) |
54 | 44 |
|
55 | 45 |
|
56 | 46 | def import_models(file_name): |
57 | | - project_root = os.path.dirname( |
58 | | - os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
59 | | - ) |
60 | | - path = os.path.join(project_root, "models", file_name) |
61 | | - |
| 47 | + path = os.path.join(get_project_root(), "models", file_name) |
62 | 48 | if not os.path.exists(path): |
63 | 49 | return None |
64 | | - |
65 | | - data = joblib.load(path) |
66 | | - return data |
| 50 | + return joblib.load(path) |
67 | 51 |
|
68 | 52 |
|
69 | 53 | def export_processed_data(matrix, filename): |
70 | | - project_root = os.path.dirname( |
71 | | - os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
72 | | - ) |
73 | | - processed_dir = os.path.join(project_root, "data", "processed") |
| 54 | + processed_dir = os.path.join(get_project_root(), "data", "processed") |
74 | 55 | os.makedirs(processed_dir, exist_ok=True) |
75 | | - |
76 | 56 | path = os.path.join(processed_dir, filename) |
77 | 57 | save_npz(path, matrix) |
78 | 58 |
|
79 | 59 |
|
80 | 60 | def import_processed_data(filename): |
81 | | - project_root = os.path.dirname( |
82 | | - os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
83 | | - ) |
84 | | - processed_dir = os.path.join(project_root, "data", "processed") |
| 61 | + processed_dir = os.path.join(get_project_root(), "data", "processed") |
85 | 62 | path = os.path.join(processed_dir, filename) |
86 | 63 | if not os.path.exists(path): |
87 | 64 | return None |
|
0 commit comments