Skip to content

Commit 31c34d0

Browse files
committed
remove hard coded tokenizer path
1 parent a29c848 commit 31c34d0

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

fastvideo/data_preprocess/preprocess.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
logger = init_logger(__name__)
1717

1818
def main(args):
19+
args.model_path = maybe_download_model(args.model_path)
1920
# Assume using torchrun
2021
local_rank = int(os.getenv("RANK", 0))
2122
rank = int(os.environ.get("RANK", 0))

fastvideo/v1/dataset/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os
2+
13
from torchvision import transforms
24
from torchvision.transforms import Lambda
35
from transformers import AutoTokenizer
@@ -25,8 +27,8 @@ def getdataset(args, start_idx=0) -> T2V_dataset:
2527
*resize_topcrop,
2628
norm_fun,
2729
])
28-
# tokenizer = AutoTokenizer.from_pretrained("/storage/ongoing/new/Open-Sora-Plan/cache_dir/mt5-xxl", cache_dir=args.cache_dir)
29-
tokenizer = AutoTokenizer.from_pretrained(args.text_encoder_name,
30+
tokenizer_path = os.path.join(args.model_path, "tokenizer")
31+
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path,
3032
cache_dir=args.cache_dir)
3133
if args.dataset == "t2v":
3234
return T2V_dataset(args,

0 commit comments

Comments
 (0)