Skip to content

Commit 4629d91

Browse files
committed
there is a bug with large zip files and path joining so we need to investigate it
1 parent e693243 commit 4629d91

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

self_hosting_machinery/finetune/scripts/finetune_train.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import sys
21
import click
32
import copy
43
import json
@@ -18,7 +17,8 @@
1817
import torch.distributed as dist
1918

2019
from refact_utils.scripts import env
21-
from refact_utils.scripts.env import safe_paths_join
20+
# TODO: there is a bug with large zip files and path joining so we need to investigate it
21+
# from refact_utils.scripts.env import safe_paths_join
2222
from refact_utils.finetune.utils import finetune_train_defaults
2323
from self_hosting_machinery.finetune.configuration.finetune_config import base_config, ConfigBuilder
2424
from self_hosting_machinery.finetune.scripts.auxiliary.dataset import (
@@ -170,8 +170,8 @@ def gpu_filter_and_build_config(
170170
def _copy_source_files(jsonl_src, jsonl_dst, pname, run_id):
171171
for d in jsonlines.open(jsonl_src):
172172
try:
173-
src_path = safe_paths_join(env.PP_DIR_UNPACKED(pname), d["path"])
174-
dst_path = safe_paths_join(env.PERRUN_DIR_UNPACKED(run_id), d["path"])
173+
src_path = os.path.join(env.PP_DIR_UNPACKED(pname), d["path"])
174+
dst_path = os.path.join(env.PERRUN_DIR_UNPACKED(run_id), d["path"])
175175
except ValueError as e:
176176
raise ValueError(f'copy source files error: {e}')
177177
os.makedirs(os.path.dirname(dst_path), exist_ok=True)

0 commit comments

Comments
 (0)