Skip to content

Commit a5848a9

Browse files
committed
added directchat to code tagger
1 parent e46ad65 commit a5848a9

File tree

1 file changed

+16
-7
lines changed

1 file changed

+16
-7
lines changed

fastchat/serve/monitor/code_tagger.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import nltk
77
from tqdm import tqdm
88
from nltk.tokenize import word_tokenize
9-
9+
import pandas as pd
1010

1111
def is_code_conversation(text: str) -> tuple[bool, list[str]]:
1212
"""Check if the text is a code conversation"""
@@ -132,18 +132,26 @@ def check_code_conv(conv) -> tuple[bool, list[str]]:
132132

133133

134134
def check_conv_row(conv_row):
135+
# check_a, code_a = check_code_conv(conv_row["conversation"])
136+
# return check_a, code_a
135137
check_a, code_a = check_code_conv(conv_row["conversation_a"])
136138
check_b, code_b = check_code_conv(conv_row["conversation_b"])
137-
138139
return check_a or check_b, code_a + code_b
139140

140-
141-
def process_battle_file(battle_file_path: str, n_cpus: int):
142-
with open(battle_file_path, "r") as f:
143-
data = json.load(f)
141+
def process_battle_file(battle_file_path: str, n_cpus: int, direct_chat: bool = False):
142+
# with open(battle_file_path, "r") as f:
143+
# data = json.load(f)
144+
# data = pd.read_json(battle_file_path).to_dict("records")
145+
data = pd.read_json(battle_file_path, lines=True).to_dict("records")
146+
print(data[0])
144147

145148
with mp.Pool(n_cpus) as pool:
146-
tagged_data = list(tqdm(pool.imap(check_conv_row, data), total=len(data)))
149+
tagged_data = list(
150+
tqdm(
151+
pool.imap(check_conv_row, data),
152+
total=len(data),
153+
)
154+
)
147155

148156
output_data = [row for row, (is_code, _) in zip(data, tagged_data) if is_code]
149157

@@ -154,6 +162,7 @@ def process_battle_file(battle_file_path: str, n_cpus: int):
154162
parser = argparse.ArgumentParser()
155163
parser.add_argument("--clean-battle-file", type=str)
156164
parser.add_argument("--output-clean-battle-file", type=str, default=None)
165+
parser.add_argument("--direct-chat", action="store_true")
157166
parser.add_argument("--n-cpus", type=int, default=-1)
158167

159168
args = parser.parse_args()

0 commit comments

Comments
 (0)