-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathia.py
More file actions
97 lines (76 loc) · 3.09 KB
/
ia.py
File metadata and controls
97 lines (76 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import yaml
import csv # ✅ Import CSV module to properly parse quoted values
from src.config import ensure_config, PROJECT_ROOT
from src.upload import upload_files
from src.update import modify_item_metadata
from src.log import log_action
def ia():
# Ensure the Internet Archive configuration exists
ensure_config()
# Load settings
with open("settings.yml", "r") as file:
settings = yaml.safe_load(file)
prefix = settings["ia"]["item_prefix"]
suffix = settings["ia"]["item_suffix"]
separator = settings["ia"]["item_separator"]
untitled = settings["ia"]["item_untitled"]
source = settings["source"]["dir"]
# Define directory for files_to_upload
assets_dir = os.path.join(PROJECT_ROOT, source, "assets")
# Ensure the directory exists
if not os.path.exists(assets_dir):
print(f"❌ Error: Directory '{assets_dir}' not found!")
exit(1)
# Load coodebook.csv
codebook = []
codebook_path = os.path.join(PROJECT_ROOT, source, "codebook.csv")
with open(codebook_path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
codebook = list(reader)
# ✅ Load items.csv properly
items = []
items_path = os.path.join(PROJECT_ROOT, source, "items.csv")
with open(items_path, newline="", encoding="utf-8") as f:
reader = csv.reader(f) # ✅ Properly parses quoted CSV fields
items = list(reader)
# ✅ Load files.csv properly
files = []
files_path = os.path.join(PROJECT_ROOT, source, "files.csv")
with open(files_path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
files = list(reader)
# ✅ Load metadata.csv properly
metadata = []
metadata_path = os.path.join(PROJECT_ROOT, source, "metadata.csv")
with open(metadata_path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
metadata = list(reader)
# ✅ Iterate through each item
for item in items:
if not item: # Ensure the row isn't empty
continue
# Get the identifier and prepend abolition_now_ to it
id = item[0]
# If the id is empty, make identifier separator.join([prefix, str(id)]) else make it empty
identifier = separator.join([prefix, str(id)]) if id else ""
log_action(f"Processing: {id}", "info")
# ✅ Get the files to upload
files_to_upload = [file[1] for file in files if file and file[0] == id]
# ✅ Get the metadata row for the item
core_metadata = {
"title": item[1] if item[1] else f"{untitled}",
"creator": item[2],
}
codes = []
for entry in metadata:
if entry and entry[0] == id:
# Get the codes for the item and make this string a list
codes = entry[1]
break # Stop after finding the first match
# ✅ Upload files
upload_files(identifier, files_to_upload)
# ✅ Update Metadata
modify_item_metadata(identifier, core_metadata, codes, codebook)
if __name__ == "__main__":
ia()