Skip to content

Commit aea8a5b

Browse files
andrewkhofacebook-github-bot
authored andcommitted
add compat module to point fbcode users to torchdata_deprecated
Summary: We're deleting datapipes in torchdata. TorchText ahs a bunch of datapipes defined, and we don't want to break fbcode tests. Internally we are moving users to `torchdata_deprecated` but we need to jump through some hoops. This diff creates a "compat" module which redirects to torchdata.datapipes.iter by default, but in FBCode we patch it to point to torchdata_deprecated instead. Reviewed By: ivy-zhou Differential Revision: D64208916 fbshipit-source-id: 6b625d87f3c42b28207bd84de3417cc3bc74caee
1 parent 27e2ae9 commit aea8a5b

36 files changed

+134
-171
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Sphinx
1919
pytest
2020
expecttest
2121
parameterized
22-
torchdata>0.5
22+
0.5<torchdata<=0.9.0
2323

2424
# Lets pytest find our code by automatically modifying PYTHONPATH
2525
pytest-pythonpath

torchtext/_download_hooks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# This is to allow monkey-patching in fbcode
66
from torch.hub import load_state_dict_from_url # noqa
7-
from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401
7+
from torchtext.compat.datapipes.iter import HttpReader, GDriveReader # noqa F401
88
from tqdm import tqdm
99

1010

torchtext/compat/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from torchtext._internal.module_utils import is_module_available
2+
3+
4+
def check_for_torchdata():
5+
if not is_module_available("torchdata"):
6+
raise ModuleNotFoundError(
7+
"Package `torchdata` not found. Please install torchdata <= 0.9.0: https://github.com/pytorch/data"
8+
)
9+
10+
11+
__all__ = [
12+
"check_for_torchdata",
13+
"dataloader2",
14+
"datapipes",
15+
]

torchtext/compat/datapipes/__init__.py

Whitespace-only changes.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from torchdata.datapipes.iter import (
2+
FileOpener,
3+
GDriveReader,
4+
HttpReader,
5+
IterableWrapper,
6+
OnlineReader,
7+
)
8+
9+
10+
__all__ = [
11+
"FileOpener",
12+
"GDriveReader",
13+
"HttpReader",
14+
"IterableWrapper",
15+
"OnlineReader",
16+
]

torchtext/datasets/ag_news.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
65
from torchtext._download_hooks import HttpReader
7-
from torchtext._internal.module_utils import is_module_available
6+
from torchtext.compat import check_for_torchdata
7+
8+
from torchtext.compat.datapipes.iter import FileOpener, IterableWrapper
89
from torchtext.data.datasets_utils import (
910
_wrap_split_argument,
1011
_create_dataset_directory,
@@ -61,10 +62,7 @@ def AG_NEWS(root: str, split: Union[Tuple[str], str]):
6162
:returns: DataPipe that yields tuple of label (1 to 4) and text
6263
:rtype: (int, str)
6364
"""
64-
if not is_module_available("torchdata"):
65-
raise ModuleNotFoundError(
66-
"Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
67-
)
65+
check_for_torchdata()
6866

6967
url_dp = IterableWrapper([URL[split]])
7068
cache_dp = url_dp.on_disk_cache(

torchtext/datasets/amazonreviewfull.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
65
from torchtext._download_hooks import GDriveReader
76
from torchtext._internal.module_utils import is_module_available
7+
from torchtext.compat import check_for_torchdata
8+
9+
from torchtext.compat.datapipes.iter import FileOpener, IterableWrapper
810
from torchtext.data.datasets_utils import (
911
_wrap_split_argument,
1012
_create_dataset_directory,
@@ -75,10 +77,7 @@ def AmazonReviewFull(root: str, split: Union[Tuple[str], str]):
7577
:returns: DataPipe that yields tuple of label (1 to 5) and text containing the review title and text
7678
:rtype: (int, str)
7779
"""
78-
if not is_module_available("torchdata"):
79-
raise ModuleNotFoundError(
80-
"Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
81-
)
80+
check_for_torchdata()
8281

8382
url_dp = IterableWrapper([URL])
8483
cache_compressed_dp = url_dp.on_disk_cache(

torchtext/datasets/amazonreviewpolarity.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5+
from torchtext.compat.datapipes.iter import FileOpener, IterableWrapper
66
from torchtext._download_hooks import GDriveReader
77
from torchtext._internal.module_utils import is_module_available
8+
from torchtext.compat import check_for_torchdata
89
from torchtext.data.datasets_utils import (
910
_wrap_split_argument,
1011
_create_dataset_directory,
@@ -72,10 +73,7 @@ def AmazonReviewPolarity(root: str, split: Union[Tuple[str], str]):
7273
:rtype: (int, str)
7374
"""
7475
# TODO Remove this after removing conditional dependency
75-
if not is_module_available("torchdata"):
76-
raise ModuleNotFoundError(
77-
"Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
78-
)
76+
check_for_torchdata()
7977

8078
url_dp = IterableWrapper([URL])
8179
cache_compressed_dp = url_dp.on_disk_cache(

torchtext/datasets/cc100.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os.path
22
from functools import partial
33

4-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
4+
from torchtext.compat.datapipes.iter import FileOpener, IterableWrapper
55
from torchtext._download_hooks import HttpReader
66
from torchtext.data.datasets_utils import (
77
_create_dataset_directory,

torchtext/datasets/cnndm.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
from functools import partial
44
from typing import Union, Set, Tuple
55

6-
from torchdata.datapipes.iter import (
6+
from torchtext.compat import check_for_torchdata
7+
8+
from torchtext.compat.datapipes.iter import (
79
FileOpener,
10+
GDriveReader,
811
IterableWrapper,
912
OnlineReader,
10-
GDriveReader,
1113
)
12-
from torchtext._internal.module_utils import is_module_available
1314
from torchtext.data.datasets_utils import (
1415
_wrap_split_argument,
1516
_create_dataset_directory,
@@ -137,10 +138,7 @@ def CNNDM(root: str, split: Union[Tuple[str], str]):
137138
:returns: DataPipe that yields a tuple of texts containing an article and its abstract (i.e. (article, abstract))
138139
:rtype: (str, str)
139140
"""
140-
if not is_module_available("torchdata"):
141-
raise ModuleNotFoundError(
142-
"Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
143-
)
141+
check_for_torchdata()
144142

145143
cnn_dp = _load_stories(root, "cnn", split)
146144
dailymail_dp = _load_stories(root, "dailymail", split)

0 commit comments

Comments
 (0)