-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
116 lines (92 loc) · 3.61 KB
/
utils.py
File metadata and controls
116 lines (92 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import json
import ujson
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import random
from concurrent.futures import ThreadPoolExecutor
def read_jsonl(jsonl_path):
res_array=[]
with open(jsonl_path,'r') as f:
for i in tqdm(f):
score=json.loads(i.strip())["__dj__stats__"]
res_array.append([i[0] if type(i) is list else i for i in score.values()])
return pd.DataFrame(res_array)
def read_jsonl_i(jsonl_path):
with open(jsonl_path, 'r') as f:
lines = f.readlines()
res_array = [None] * len(lines)
for idx, line in enumerate(tqdm(lines, desc="Processing", unit=" lines")):
score = ujson.loads(line.strip())["__dj__stats__"]
res_array[idx] = [i[0] if isinstance(i, list) else i for i in score.values()]
return pd.DataFrame(res_array)
def read_jsonl_vhclip(jsonl_path):
res_array=[]
with open(jsonl_path,'r') as f:
for i in tqdm(f):
score=json.loads(i.strip())
res_array.append([i[0] if type(i) is list else i for i in score.values()])
return pd.DataFrame(res_array)
def read_jsonl_gdino(jsonl_path):
with open(jsonl_path, 'r') as f:
lines = f.readlines()
res_array = []
for idx, line in enumerate(tqdm(lines, desc="Processing", unit=" lines")):
score = ujson.loads(line.strip())["__dj__stats__"].get("grounding_dino_detection", None)
if score["scores"] is None or score["num_detections"] == 0:
res_array.append([[0.0]])
else:
res_array.append([score["scores"]])
return pd.DataFrame(res_array)
def read_jsonl_icc(jsonl_path):
res_array=[]
with open(jsonl_path,'r') as f:
for i in tqdm(f):
score=json.loads(i.strip())
res_array.append(score["score"])
return pd.DataFrame(res_array)
def process_line(args):
line, i, index_set = args
if i in index_set:
data = json.loads(line)
return json.dumps(data)
return None
def copy_selected_data(jsonlin, jsonlout, index_list):
index_set = set(index_list)
with open(jsonlin, 'r') as infile:
lines = infile.readlines()
with ThreadPoolExecutor() as executor:
results = list(tqdm(executor.map(process_line, [(line, i, index_set) for i, line in enumerate(lines)]), total=len(lines)))
with open(jsonlout, 'w') as outfile:
for result in results:
if result is not None:
outfile.write(result + '\n')
def process_line_withscore(args):
line, i, index_set, search_dict = args
if i in index_set:
data = json.loads(line)
data["score"] = search_dict[i]
return json.dumps(data)
return None
def copy_selected_data_withscore(jsonlin, jsonlout, index_list, score_list):
index_set = set(index_list)
search_dict = dict(zip(index_list, score_list))
with open(jsonlin, 'r') as infile:
lines = infile.readlines()
with ThreadPoolExecutor() as executor:
results = list(tqdm(executor.map(process_line_withscore, [(line, i, index_set,search_dict) for i, line in enumerate(lines)]), total=len(lines)))
with open(jsonlout, 'w') as outfile:
for result in results:
if result is not None:
outfile.write(result + '\n')
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False