|
19 | 19 |
|
20 | 20 | ### Constants used for data |
21 | 21 | DATA_DIR = os.path.join(os.path.dirname(__file__)) |
| 22 | +JSON_DATA_DIR = os.path.join(os.path.dirname(__file__), "json") |
| 23 | +JSONL_DATA_DIR = os.path.join(os.path.dirname(__file__), "jsonl") |
| 24 | +ARROW_DATA_DIR = os.path.join(os.path.dirname(__file__), "arrow") |
22 | 25 | PARQUET_DATA_DIR = os.path.join(os.path.dirname(__file__), "parquet") |
23 | | -TWITTER_COMPLAINTS_DATA_JSON = os.path.join(DATA_DIR, "twitter_complaints_small.json") |
24 | | -TWITTER_COMPLAINTS_DATA_JSONL = os.path.join(DATA_DIR, "twitter_complaints_small.jsonl") |
25 | | -TWITTER_COMPLAINTS_DATA_ARROW = os.path.join(DATA_DIR, "twitter_complaints_small.arrow") |
| 26 | + |
| 27 | +TWITTER_COMPLAINTS_DATA_JSON = os.path.join( |
| 28 | + JSON_DATA_DIR, "twitter_complaints_small.json" |
| 29 | +) |
| 30 | +TWITTER_COMPLAINTS_DATA_JSONL = os.path.join( |
| 31 | + JSONL_DATA_DIR, "twitter_complaints_small.jsonl" |
| 32 | +) |
| 33 | +TWITTER_COMPLAINTS_DATA_ARROW = os.path.join( |
| 34 | + ARROW_DATA_DIR, "twitter_complaints_small.arrow" |
| 35 | +) |
26 | 36 | TWITTER_COMPLAINTS_DATA_PARQUET = os.path.join( |
27 | 37 | PARQUET_DATA_DIR, "twitter_complaints_small.parquet" |
28 | 38 | ) |
29 | 39 | TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON = os.path.join( |
30 | | - DATA_DIR, "twitter_complaints_input_output.json" |
| 40 | + JSON_DATA_DIR, "twitter_complaints_input_output.json" |
31 | 41 | ) |
32 | 42 | TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL = os.path.join( |
33 | | - DATA_DIR, "twitter_complaints_input_output.jsonl" |
| 43 | + JSONL_DATA_DIR, "twitter_complaints_input_output.jsonl" |
34 | 44 | ) |
35 | 45 | TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_ARROW = os.path.join( |
36 | | - DATA_DIR, "twitter_complaints_input_output.arrow" |
| 46 | + ARROW_DATA_DIR, "twitter_complaints_input_output.arrow" |
37 | 47 | ) |
38 | 48 | TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_PARQUET = os.path.join( |
39 | 49 | PARQUET_DATA_DIR, "twitter_complaints_input_output.parquet" |
40 | 50 | ) |
41 | 51 | TWITTER_COMPLAINTS_TOKENIZED_JSON = os.path.join( |
42 | | - DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json" |
| 52 | + JSON_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json" |
43 | 53 | ) |
44 | 54 | TWITTER_COMPLAINTS_TOKENIZED_JSONL = os.path.join( |
45 | | - DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.jsonl" |
| 55 | + JSONL_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.jsonl" |
46 | 56 | ) |
47 | 57 | TWITTER_COMPLAINTS_TOKENIZED_ARROW = os.path.join( |
48 | | - DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.arrow" |
| 58 | + ARROW_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.arrow" |
49 | 59 | ) |
50 | 60 | TWITTER_COMPLAINTS_TOKENIZED_PARQUET = os.path.join( |
51 | 61 | PARQUET_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.parquet" |
52 | 62 | ) |
53 | | -EMPTY_DATA = os.path.join(DATA_DIR, "empty_data.json") |
54 | | -MALFORMATTED_DATA = os.path.join(DATA_DIR, "malformatted_data.json") |
| 63 | +EMPTY_DATA = os.path.join(JSON_DATA_DIR, "empty_data.json") |
| 64 | +MALFORMATTED_DATA = os.path.join(JSON_DATA_DIR, "malformatted_data.json") |
55 | 65 | MODEL_NAME = "Maykeye/TinyLLama-v0" |
0 commit comments