Skip to content

Commit a5b78e0

Browse files
committed
feat: add example
1 parent adc1be6 commit a5b78e0

File tree

3 files changed

+51
-9
lines changed

3 files changed

+51
-9
lines changed

dingo/data/converter/base.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,7 @@ def _convert(raw: Union[str, Dict]):
274274
j = raw
275275
if isinstance(raw, str):
276276
j = json.loads(raw)
277-
# 将 Excel 行数据作为 JSON 字符串放入 content 属性
278-
# 这样可以与其他数据格式保持一致的数据结构
279-
data_dict = {"content": json.dumps(j, ensure_ascii=False)}
277+
data_dict = j
280278
return Data(**data_dict)
281279

282280
return _convert

examples/dataset/excel.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import os
2+
3+
from dingo.config import InputArgs
4+
from dingo.exec import Executor
5+
6+
if __name__ == '__main__':
7+
input_data = {
8+
"input_path": "../../test/data/test_local_excel.xlsx",
9+
"dataset": {
10+
"source": "local",
11+
"format": "excel",
12+
"excel_config": {
13+
"sheet_name": 0,
14+
"has_header": True,
15+
}
16+
},
17+
"executor": {
18+
"result_save": {
19+
"bad": True,
20+
"good": True,
21+
"raw": True,
22+
}
23+
},
24+
"evaluator": [
25+
{
26+
"fields": {"id":"id", "content": "content"},
27+
"evals": [
28+
{"name": "RuleColonEnd"},
29+
{"name": "RuleSpecialCharacter"}
30+
]
31+
}
32+
]
33+
}
34+
input_args = InputArgs(**input_data)
35+
executor = Executor.exec_map["local"](input_args)
36+
result = executor.execute()
37+
print(result)

test/scripts/dataset/test_excel_dataset.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,14 @@ def test_xlsx_with_header():
152152

153153
# 验证数据格式
154154
if idx == 0:
155-
# 第一行数据应该有 "姓名", "年龄", "城市", "分数" 这些键
156-
assert hasattr(data, 'content'), "数据缺少 content 属性"
157-
data_dict = json.loads(data.content)
155+
# 第一行数据应该有 "姓名", "年龄", "城市", "分数" 这些字段
156+
data_dict = data.to_dict()
158157
assert "姓名" in data_dict, "数据缺少 '姓名' 字段"
159158
assert "年龄" in data_dict, "数据缺少 '年龄' 字段"
160159
assert "城市" in data_dict, "数据缺少 '城市' 字段"
161160
assert "分数" in data_dict, "数据缺少 '分数' 字段"
161+
# 也可以直接通过属性访问
162+
assert hasattr(data, '姓名'), "数据对象缺少 '姓名' 属性"
162163
print("✓ 数据格式验证通过")
163164

164165
assert count == 4, f"期望读取 4 行数据,实际读取了 {count} 行"
@@ -228,11 +229,13 @@ def test_xlsx_without_header():
228229

229230
# 验证数据格式(使用数字作为列名)
230231
if idx == 0:
231-
data_dict = json.loads(data.content)
232+
data_dict = data.to_dict()
232233
assert "0" in data_dict, "数据缺少 '0' 字段"
233234
assert "1" in data_dict, "数据缺少 '1' 字段"
234235
assert "2" in data_dict, "数据缺少 '2' 字段"
235236
assert "3" in data_dict, "数据缺少 '3' 字段"
237+
# 也可以直接通过属性访问(字符串形式的数字)
238+
assert hasattr(data, '0'), "数据对象缺少 '0' 属性"
236239
print("✓ 数据格式验证通过(使用列序号作为键)")
237240

238241
assert count == 4, f"期望读取 4 行数据,实际读取了 {count} 行"
@@ -302,9 +305,11 @@ def test_xlsx_sheet_by_name():
302305

303306
# 验证数据格式
304307
if idx == 0:
305-
data_dict = json.loads(data.content)
308+
data_dict = data.to_dict()
306309
assert "ID" in data_dict, "数据缺少 'ID' 字段"
307310
assert "名称" in data_dict, "数据缺少 '名称' 字段"
311+
# 也可以直接通过属性访问
312+
assert hasattr(data, 'ID'), "数据对象缺少 'ID' 属性"
308313
print("✓ 数据格式验证通过")
309314

310315
assert count == 2, f"期望读取 2 行数据,实际读取了 {count} 行"
@@ -377,9 +382,11 @@ def test_xls_with_header():
377382

378383
# 验证数据格式
379384
if idx == 0:
380-
data_dict = json.loads(data.content)
385+
data_dict = data.to_dict()
381386
assert "姓名" in data_dict, "数据缺少 '姓名' 字段"
382387
assert "年龄" in data_dict, "数据缺少 '年龄' 字段"
388+
# 也可以直接通过属性访问
389+
assert hasattr(data, '姓名'), "数据对象缺少 '姓名' 属性"
383390
print("✓ 数据格式验证通过")
384391

385392
assert count == 4, f"期望读取 4 行数据,实际读取了 {count} 行"

0 commit comments

Comments
 (0)