diff --git a/img2dataset/reader.py b/img2dataset/reader.py index b502f94..bc8103c 100644 --- a/img2dataset/reader.py +++ b/img2dataset/reader.py @@ -94,7 +94,7 @@ def _save_to_arrow(self, input_file, start_shard_id): compression = "gzip" with self.fs.open(input_file, encoding="utf-8", mode="rb", compression=compression) as file: if self.input_format in ["txt", "txt.gz"]: - df = csv_pa.read_csv(file, read_options=csv_pa.ReadOptions(column_names=["url"])) + df = pa.Table.from_pandas(pd.DataFrame({"url" : file.read().decode().splitlines()})) elif self.input_format in ["json", "json.gz"]: df = pa.Table.from_pandas(pd.read_json(file)) elif self.input_format in ["csv", "csv.gz"]: