LLM-ASPD/misc.py at main · TencentYoutuResearch/LLM-ASPD · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import logging
import json
import os
import xlsxwriter
import torch
import tqdm

def read2jsonline(filename: str):
    """
    Read a JSON Lines (.jsonl) file.

    Args:
        filename (str): The path to the .jsonl file.

    Returns:
        list: A list of dictionaries, each representing a line in the file.
    """
    lines = []
    with open(filename, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    lines = [json.loads(l) for l in lines if l.strip() != '']
    return lines

def write2jsonline(filename: str, lines):
    """
    Write data to a JSON Lines (.jsonl) file.

    Args:
        filename (str): The path to the output .jsonl file.
        lines (list): A list of dictionaries to write to the file.
    """
    print(f'[{filename}] write size: {len(lines)}')
    with open(filename, 'w', encoding='utf-8', newline='\n') as f:
        for i, l in enumerate(lines):
            json.dump(l, f, ensure_ascii=False, indent=None, separators=(',', ':'))
            f.write('\n')

def read2json(filename):
    """
    Read a JSON file.

    Args:
        filename (str): The path to the .json file.

    Returns:
        dict or list: The data loaded from the JSON file.
    """
    with open(filename, 'r', encoding='utf-8') as fr:
        return json.load(fr)

def write2json(filename,save_data):
    """
    Write data to a JSON file with indentation.

    Args:
        filename (str): The path to the output .json file.
        save_data (dict or list): The data to be written into the file.
    """
    print(f'filename: {filename}, write size: {len(save_data)}')
    with open(filename, 'w', encoding='utf-8') as fw:
        json.dump(save_data, fw, ensure_ascii=False, indent=2)

def number_to_column(n):
    """
    Convert a column number to its corresponding Excel column letter(s).

    Args:
        n (int): The column number (1-based).

    Returns:
        str: The corresponding Excel column label (e.g., A, B, ..., AA, AB, ...).
    """
    result = ""
    while n > 0:
        n -= 1
        result = chr(n % 26 + ord('A')) + result
        n //= 26
    return result

def coordinates_to_excel(x, y):
    """
    Convert (row, column) coordinates to Excel cell notation.

    Args:
        x (int): Excel row number (1-based).
        y (int): Excel column number (1-based).

    Returns:
        str: Excel-style cell (e.g., 'A1', 'B2', etc.).
    """
    column = number_to_column(y)
    return f"{column}{x}"

def write2excel(filename: str, json_data, field_names: list[str]):
    """
    Write structured data into an Excel (.xlsx) file.

    Args:
        filename (str): The output path for the Excel file.
        json_data (list[dict]): List of dictionaries containing the data.
        field_names (list[str]): List of field names (keys) representing columns to be included.
    """
    if not json_data:
        return
    print(f'filename: {filename}, write size: {len(json_data)}')
    workbook = xlsxwriter.Workbook(filename)
    worksheet = workbook.add_worksheet()

    worksheet.write(coordinates_to_excel(1, 1), '序号')
    for i, name in enumerate(field_names):
        worksheet.write(coordinates_to_excel(1, i + 2), name)

    for i, d in enumerate(json_data, 2):
        for j, name in enumerate(field_names):
            worksheet.write(coordinates_to_excel(i, j + 2), d[name])
            worksheet.write(coordinates_to_excel(i, 1), i - 2)
    workbook.close()


import numpy as np
import time
import os
import random

def setup_seed(seed):
    """
    Set random seed for reproducibility across various libraries.

    Args:
        seed (int): The seed value to set.
    """
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True