Skip to content

Commit d73f89c

Browse files
committed
+ add trust_remote_code=True to all load_dataset invoking
1 parent ec7f7c6 commit d73f89c

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

trinity/buffer/reader/file_reader.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def __init__(self, meta: StorageConfig, config: BufferConfig):
6666
self.response_key = meta.format.response_key
6767
self.read_batch_size = config.read_batch_size
6868
self.dataset = _HFBatchReader(
69-
load_dataset(meta.path, name=subset_name, split=self.split)
69+
load_dataset(meta.path, name=subset_name, split=self.split, trust_remote_code=True)
7070
) # TODO: support resume
7171
self.data_iter = self.dataset.iter(self.read_batch_size, drop_last_batch=True)
7272
self.tokenizer = transformers.AutoTokenizer.from_pretrained(config.tokenizer_path)
@@ -143,7 +143,7 @@ def __init__(self, meta: StorageConfig, config: BufferConfig):
143143
self.rejected_key = meta.format.rejected_key
144144
self.read_batch_size = config.read_batch_size
145145
self.dataset = _HFBatchReader(
146-
load_dataset(meta.path, name=subset_name, split=self.split)
146+
load_dataset(meta.path, name=subset_name, split=self.split, trust_remote_code=True)
147147
) # TODO: support resume
148148
self.data_iter = self.dataset.iter(self.read_batch_size, drop_last_batch=True)
149149
self.tokenizer = transformers.AutoTokenizer.from_pretrained(config.tokenizer_path)
@@ -215,7 +215,7 @@ def __init__(self, meta: StorageConfig, config: BufferConfig):
215215
self.epoch = 0
216216
datasets.disable_caching()
217217
self.dataset = _HFBatchReader(
218-
load_dataset(meta.path, name=subset_name, split=self.split),
218+
load_dataset(meta.path, name=subset_name, split=self.split, trust_remote_code=True),
219219
max_epoch=self.meta.total_epochs if meta.task_type == TaskType.EXPLORE else 1,
220220
offset=self.meta.index,
221221
)
@@ -266,7 +266,7 @@ def read(
266266
class RawDataReader(BufferReader):
267267
def __init__(self, meta: StorageConfig, config: Optional[BufferConfig]):
268268
self.returned = False
269-
self.dataset = load_dataset(meta.path, name=meta.subset_name, split=meta.split)
269+
self.dataset = load_dataset(meta.path, name=meta.subset_name, split=meta.split, trust_remote_code=True)
270270

271271
def __len__(self):
272272
return len(self.dataset)

0 commit comments

Comments
 (0)