Skip to content

Commit 0de52d1

Browse files
tchatonpre-commit-ci[bot]thomas
authored andcommitted
Add DNS optimize support (#19429)
* update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * update * update * update * update * update --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: thomas <[email protected]> (cherry picked from commit 4c2fc3b)
1 parent 452f434 commit 0de52d1

File tree

15 files changed

+111
-26
lines changed

15 files changed

+111
-26
lines changed

src/lightning/data/processing/data_processor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@
2020
from tqdm.auto import tqdm as _tqdm
2121

2222
from lightning import seed_everything
23-
from lightning.data.processing.readers import BaseReader
24-
from lightning.data.streaming import Cache
25-
from lightning.data.streaming.cache import Dir
26-
from lightning.data.streaming.client import S3Client
27-
from lightning.data.streaming.constants import (
23+
from lightning.data.constants import (
2824
_BOTO3_AVAILABLE,
2925
_DEFAULT_FAST_DEV_RUN_ITEMS,
3026
_INDEX_FILENAME,
3127
_IS_IN_STUDIO,
3228
_LIGHTNING_CLOUD_LATEST,
3329
_TORCH_GREATER_EQUAL_2_1_0,
3430
)
31+
from lightning.data.processing.readers import BaseReader
32+
from lightning.data.streaming import Cache
33+
from lightning.data.streaming.cache import Dir
34+
from lightning.data.streaming.client import S3Client
3535
from lightning.data.streaming.resolver import _resolve_dir
3636
from lightning.data.utilities.broadcast import broadcast_object
3737
from lightning.data.utilities.packing import _pack_greedily

src/lightning/data/processing/dns.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from contextlib import contextmanager
2+
from subprocess import Popen
3+
from typing import Any
4+
5+
from lightning.data.constants import _IS_IN_STUDIO
6+
7+
8+
@contextmanager
9+
def optimize_dns_context(enable: bool) -> Any:
10+
optimize_dns(enable)
11+
try:
12+
yield
13+
optimize_dns(False) # always disable the optimize DNS
14+
except Exception as e:
15+
optimize_dns(False) # always disable the optimize DNS
16+
raise e
17+
18+
def optimize_dns(enable: bool) -> None:
19+
if not _IS_IN_STUDIO:
20+
return
21+
22+
with open("/etc/resolv.conf") as f:
23+
lines = f.readlines()
24+
25+
if (
26+
(enable and any("127.0.0.53" in line for line in lines))
27+
or (not enable and any("127.0.0.1" in line for line in lines))
28+
): # noqa E501
29+
Popen(f"sudo /home/zeus/miniconda3/envs/cloudspace/bin/python -c 'from lightning.data.processing.dns import _optimize_dns; _optimize_dns({enable})'", shell=True).wait() # noqa E501
30+
31+
def _optimize_dns(enable: bool) -> None:
32+
with open("/etc/resolv.conf") as f:
33+
lines = f.readlines()
34+
35+
write_lines = []
36+
for line in lines:
37+
if "nameserver 127" in line:
38+
if enable:
39+
write_lines.append('nameserver 127.0.0.1\n')
40+
else:
41+
write_lines.append('nameserver 127.0.0.53\n')
42+
else:
43+
write_lines.append(line)
44+
45+
with open("/etc/resolv.conf", "w") as f:
46+
for line in write_lines:
47+
f.write(line)

src/lightning/data/processing/functions.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@
2222

2323
import torch
2424

25+
from lightning.data.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0
2526
from lightning.data.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe
27+
from lightning.data.processing.dns import optimize_dns_context
2628
from lightning.data.processing.readers import BaseReader
27-
from lightning.data.streaming.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0
2829
from lightning.data.streaming.resolver import (
2930
Dir,
3031
_assert_dir_has_index_file,
@@ -218,7 +219,8 @@ def map(
218219
weights=weights,
219220
reader=reader,
220221
)
221-
return data_processor.run(LambdaDataTransformRecipe(fn, inputs))
222+
with optimize_dns_context(True):
223+
return data_processor.run(LambdaDataTransformRecipe(fn, inputs))
222224
return _execute(
223225
f"data-prep-map-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}",
224226
num_nodes,
@@ -303,15 +305,18 @@ def optimize(
303305
reorder_files=reorder_files,
304306
reader=reader,
305307
)
306-
return data_processor.run(
307-
LambdaDataChunkRecipe(
308-
fn,
309-
inputs,
310-
chunk_size=chunk_size,
311-
chunk_bytes=chunk_bytes,
312-
compression=compression,
308+
309+
with optimize_dns_context(True):
310+
data_processor.run(
311+
LambdaDataChunkRecipe(
312+
fn,
313+
inputs,
314+
chunk_size=chunk_size,
315+
chunk_bytes=chunk_bytes,
316+
compression=compression,
317+
)
313318
)
314-
)
319+
return None
315320
return _execute(
316321
f"data-prep-optimize-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}",
317322
num_nodes,

src/lightning/data/streaming/cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import os
1616
from typing import Any, Dict, List, Optional, Tuple, Union
1717

18-
from lightning.data.streaming.constants import (
18+
from lightning.data.constants import (
1919
_INDEX_FILENAME,
2020
_LIGHTNING_CLOUD_LATEST,
2121
_TORCH_GREATER_EQUAL_2_1_0,

src/lightning/data/streaming/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from time import time
33
from typing import Any, Optional
44

5-
from lightning.data.streaming.constants import _BOTO3_AVAILABLE
5+
from lightning.data.constants import _BOTO3_AVAILABLE
66

77
if _BOTO3_AVAILABLE:
88
import boto3

src/lightning/data/streaming/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import os
1616
from typing import Any, Dict, List, Optional, Tuple
1717

18-
from lightning.data.streaming.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
18+
from lightning.data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
1919
from lightning.data.streaming.downloader import get_downloader_cls
2020
from lightning.data.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader
2121
from lightning.data.streaming.sampler import ChunkedIndex

src/lightning/data/streaming/dataloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@
3333
)
3434
from torch.utils.data.sampler import BatchSampler, Sampler
3535

36+
from lightning.data.constants import _DEFAULT_CHUNK_BYTES, _TORCH_GREATER_EQUAL_2_1_0, _VIZ_TRACKER_AVAILABLE
3637
from lightning.data.streaming import Cache
3738
from lightning.data.streaming.combined import (
3839
__NUM_SAMPLES_YIELDED_KEY__,
3940
__SAMPLES_KEY__,
4041
CombinedStreamingDataset,
4142
)
42-
from lightning.data.streaming.constants import _DEFAULT_CHUNK_BYTES, _TORCH_GREATER_EQUAL_2_1_0, _VIZ_TRACKER_AVAILABLE
4343
from lightning.data.streaming.dataset import StreamingDataset
4444
from lightning.data.streaming.sampler import CacheBatchSampler
4545
from lightning.data.utilities.env import _DistributedEnv

src/lightning/data/streaming/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
import numpy as np
2020
from torch.utils.data import IterableDataset
2121

22-
from lightning.data.streaming import Cache
23-
from lightning.data.streaming.constants import (
22+
from lightning.data.constants import (
2423
_DEFAULT_CACHE_DIR,
2524
_INDEX_FILENAME,
2625
)
26+
from lightning.data.streaming import Cache
2727
from lightning.data.streaming.item_loader import BaseItemLoader
2828
from lightning.data.streaming.resolver import Dir, _resolve_dir
2929
from lightning.data.streaming.sampler import ChunkedIndex

src/lightning/data/streaming/downloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
from filelock import FileLock, Timeout
2121

22+
from lightning.data.constants import _INDEX_FILENAME
2223
from lightning.data.streaming.client import S3Client
23-
from lightning.data.streaming.constants import _INDEX_FILENAME
2424

2525

2626
class Downloader(ABC):

0 commit comments

Comments
 (0)