Skip to content

Commit c51491c

Browse files
authored
Merge branch 'main' into multi_turn_chat
2 parents 692f84f + 4441948 commit c51491c

16 files changed

+389
-67
lines changed

tests/artifacts/testdata/__init__.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,37 +19,47 @@
1919

2020
### Constants used for data
2121
DATA_DIR = os.path.join(os.path.dirname(__file__))
22+
JSON_DATA_DIR = os.path.join(os.path.dirname(__file__), "json")
23+
JSONL_DATA_DIR = os.path.join(os.path.dirname(__file__), "jsonl")
24+
ARROW_DATA_DIR = os.path.join(os.path.dirname(__file__), "arrow")
2225
PARQUET_DATA_DIR = os.path.join(os.path.dirname(__file__), "parquet")
23-
TWITTER_COMPLAINTS_DATA_JSON = os.path.join(DATA_DIR, "twitter_complaints_small.json")
24-
TWITTER_COMPLAINTS_DATA_JSONL = os.path.join(DATA_DIR, "twitter_complaints_small.jsonl")
25-
TWITTER_COMPLAINTS_DATA_ARROW = os.path.join(DATA_DIR, "twitter_complaints_small.arrow")
26+
27+
TWITTER_COMPLAINTS_DATA_JSON = os.path.join(
28+
JSON_DATA_DIR, "twitter_complaints_small.json"
29+
)
30+
TWITTER_COMPLAINTS_DATA_JSONL = os.path.join(
31+
JSONL_DATA_DIR, "twitter_complaints_small.jsonl"
32+
)
33+
TWITTER_COMPLAINTS_DATA_ARROW = os.path.join(
34+
ARROW_DATA_DIR, "twitter_complaints_small.arrow"
35+
)
2636
TWITTER_COMPLAINTS_DATA_PARQUET = os.path.join(
2737
PARQUET_DATA_DIR, "twitter_complaints_small.parquet"
2838
)
2939
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON = os.path.join(
30-
DATA_DIR, "twitter_complaints_input_output.json"
40+
JSON_DATA_DIR, "twitter_complaints_input_output.json"
3141
)
3242
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL = os.path.join(
33-
DATA_DIR, "twitter_complaints_input_output.jsonl"
43+
JSONL_DATA_DIR, "twitter_complaints_input_output.jsonl"
3444
)
3545
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_ARROW = os.path.join(
36-
DATA_DIR, "twitter_complaints_input_output.arrow"
46+
ARROW_DATA_DIR, "twitter_complaints_input_output.arrow"
3747
)
3848
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_PARQUET = os.path.join(
3949
PARQUET_DATA_DIR, "twitter_complaints_input_output.parquet"
4050
)
4151
TWITTER_COMPLAINTS_TOKENIZED_JSON = os.path.join(
42-
DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json"
52+
JSON_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json"
4353
)
4454
TWITTER_COMPLAINTS_TOKENIZED_JSONL = os.path.join(
45-
DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.jsonl"
55+
JSONL_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.jsonl"
4656
)
4757
TWITTER_COMPLAINTS_TOKENIZED_ARROW = os.path.join(
48-
DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.arrow"
58+
ARROW_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.arrow"
4959
)
5060
TWITTER_COMPLAINTS_TOKENIZED_PARQUET = os.path.join(
5161
PARQUET_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.parquet"
5262
)
53-
EMPTY_DATA = os.path.join(DATA_DIR, "empty_data.json")
54-
MALFORMATTED_DATA = os.path.join(DATA_DIR, "malformatted_data.json")
63+
EMPTY_DATA = os.path.join(JSON_DATA_DIR, "empty_data.json")
64+
MALFORMATTED_DATA = os.path.join(JSON_DATA_DIR, "malformatted_data.json")
5565
MODEL_NAME = "Maykeye/TinyLLama-v0"

tests/artifacts/testdata/twitter_complaints_input_output.arrow renamed to tests/artifacts/testdata/arrow/twitter_complaints_input_output.arrow

File renamed without changes.

tests/artifacts/testdata/twitter_complaints_small.arrow renamed to tests/artifacts/testdata/arrow/twitter_complaints_small.arrow

File renamed without changes.

tests/artifacts/testdata/twitter_complaints_tokenized_with_maykeye_tinyllama_v0.arrow renamed to tests/artifacts/testdata/arrow/twitter_complaints_tokenized_with_maykeye_tinyllama_v0.arrow

File renamed without changes.
File renamed without changes.
File renamed without changes.

tests/artifacts/testdata/twitter_complaints_input_output.json renamed to tests/artifacts/testdata/json/twitter_complaints_input_output.json

File renamed without changes.

tests/artifacts/testdata/twitter_complaints_small.json renamed to tests/artifacts/testdata/json/twitter_complaints_small.json

File renamed without changes.

tests/artifacts/testdata/twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json renamed to tests/artifacts/testdata/json/twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json

File renamed without changes.

tests/artifacts/testdata/twitter_complaints_input_output.jsonl renamed to tests/artifacts/testdata/jsonl/twitter_complaints_input_output.jsonl

File renamed without changes.

0 commit comments

Comments
 (0)