Skip to content

Commit 0635369

Browse files
feat: add Litracer docs in readme (#549)
* started working on readme * Update README.md * Update README.md Co-authored-by: Bhimraj Yadav <bhimrajyadav977@gmail.com> * feat: add debugger module with logging capabilities and update README for debugging instructions * Update README.md * Update README.md * update * update * Update README.md * Update README.md --------- Co-authored-by: Bhimraj Yadav <bhimrajyadav977@gmail.com>
1 parent efc5786 commit 0635369

File tree

11 files changed

+81
-12
lines changed

11 files changed

+81
-12
lines changed

README.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,8 +1144,77 @@ class CustomEncryption(Encryption):
11441144
This allows the data to remain secure while maintaining flexibility in the encryption method.
11451145
</details>
11461146

1147+
<details>
1148+
<summary> ✅ Debug & Profile LitData with logs & Litracer</summary>
1149+
1150+
&nbsp;
1151+
1152+
LitData comes with built-in logging and profiling capabilities to help you debug and profile your data streaming workloads.
1153+
1154+
<img width="1439" alt="431247797-0e955e71-2f9a-4aad-b7c1-a8218fed2e2e" src="https://github.com/user-attachments/assets/4e40676c-ba0b-49af-acac-975977173669" />
1155+
1156+
- e.g., with LitData Streaming
1157+
1158+
```python
1159+
import litdata as ld
1160+
from litdata.debugger import enable_tracer
1161+
1162+
# WARNING: Remove existing trace `litdata_debug.log` file if it exists before re-tracing
1163+
enable_tracer()
1164+
1165+
if __name__ == "__main__":
1166+
dataset = ld.StreamingDataset("s3://my-bucket/my-data", shuffle=True)
1167+
dataloader = ld.StreamingDataLoader(dataset, batch_size=64)
1168+
1169+
for batch in dataloader:
1170+
print(batch) # Replace with your data processing logic
1171+
```
1172+
1173+
1. Generate Debug Log:
1174+
1175+
- Run your Python program and it'll create a log file containing detailed debug information.
1176+
1177+
```bash
1178+
python main.py
1179+
```
1180+
1181+
2. Install [Litracer](https://github.com/deependujha/litracer/):
1182+
1183+
- Option 1: Using Go (recommended)
1184+
- Install Go on your system.
1185+
- Run the following command to install Litracer:
1186+
1187+
```bash
1188+
go install github.com/deependujha/litracer@latest
1189+
```
1190+
1191+
- Option 2: Download Binary
1192+
- Visit the [LitRacer GitHub Releases](https://github.com/deependujha/litracer/releases) page.
1193+
- Download the appropriate binary for your operating system and follow the installation instructions.
1194+
1195+
3. Convert Debug Log to trace JSON:
1196+
1197+
- Use litracer to convert the generated log file into a trace JSON file. This command uses 100 workers for conversion:
1198+
1199+
```bash
1200+
litracer litdata_debug.log -o litdata_trace.json -w 100
1201+
```
1202+
1203+
4. Visualize the trace:
1204+
1205+
- Use either `chrome://tracing` in the Chrome browser or `ui.perfetto.dev` to view the `litdata_trace.json` file for in-depth performance insights. You can also use `SQL queries` to analyze the logs.
1206+
- `Perfetto` is recommended over `chrome://tracing` for visualization & analyzing.
1207+
1208+
- Key Points:
1209+
1210+
- For very large trace.json files (`> 2GB`), refer to the [Perfetto documentation](https://perfetto.dev/docs/visualization/large-traces) for using native accelerators.
1211+
- If you are trying to connect Perfetto to the RPC server, it is recommended to use Chrome over Brave, as it has been observed that Perfetto in Brave does not autodetect the RPC server.
1212+
1213+
</details>
1214+
11471215
&nbsp;
11481216

1217+
11491218
## Features for transforming datasets
11501219

11511220
<details>
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from functools import lru_cache
1818
from typing import Tuple
1919

20-
from litdata.constants import _DEBUG, _PRINT_DEBUG_LOGS
20+
from litdata.constants import _PRINT_DEBUG_LOGS
2121
from litdata.utilities.env import _DistributedEnv, _WorkerEnv
2222

2323
# Create the root logger for the library
@@ -41,7 +41,7 @@ def __init__(self, name: str):
4141
@staticmethod
4242
def get_log_file_and_level() -> Tuple[str, int]:
4343
log_file = os.getenv("LITDATA_LOG_FILE", "litdata_debug.log")
44-
log_lvl = os.getenv("LITDATA_LOG_LEVEL", "INFO" if not _DEBUG else "DEBUG")
44+
log_lvl = os.getenv("LITDATA_LOG_LEVEL", "DEBUG")
4545

4646
log_lvl = get_logger_level(log_lvl)
4747

@@ -76,7 +76,7 @@ def setup_logger(self) -> None:
7676
self.logger.addHandler(file_handler)
7777

7878

79-
def configure_logger() -> None:
79+
def enable_tracer() -> None:
8080
os.environ["LITDATA_LOG_FILE"] = "litdata_debug.log"
8181
LitDataLogger("litdata")
8282

src/litdata/streaming/combined.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from torch.utils.data import IterableDataset
2020

21-
from litdata.loggers import ChromeTraceColors, _get_log_msg
21+
from litdata.debugger import ChromeTraceColors, _get_log_msg
2222
from litdata.streaming.dataset import StreamingDataset
2323
from litdata.utilities.env import _WorkerEnv
2424

src/litdata/streaming/compression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from typing import Dict, TypeVar
1717

1818
from litdata.constants import _ZSTD_AVAILABLE
19-
from litdata.loggers import ChromeTraceColors, _get_log_msg
19+
from litdata.debugger import ChromeTraceColors, _get_log_msg
2020

2121
TCompressor = TypeVar("TCompressor", bound="Compressor")
2222

src/litdata/streaming/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from typing import Any, Dict, List, Optional, Tuple
1818

1919
from litdata.constants import _INDEX_FILENAME
20-
from litdata.loggers import ChromeTraceColors, _get_log_msg
20+
from litdata.debugger import ChromeTraceColors, _get_log_msg
2121
from litdata.streaming.compression import _COMPRESSORS, Compressor
2222
from litdata.streaming.downloader import get_downloader
2323
from litdata.streaming.item_loader import BaseItemLoader, Interval, PyTreeLoader, TokensLoader

src/litdata/streaming/dataloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from torch.utils.data.sampler import BatchSampler, Sampler
3535

3636
from litdata.constants import _DEFAULT_CHUNK_BYTES, _VIZ_TRACKER_AVAILABLE
37-
from litdata.loggers import _get_log_msg
37+
from litdata.debugger import _get_log_msg
3838
from litdata.streaming import Cache
3939
from litdata.streaming.combined import (
4040
__NUM_SAMPLES_YIELDED_KEY__,

src/litdata/streaming/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
from litdata import __version__
2323
from litdata.constants import _INDEX_FILENAME
24+
from litdata.debugger import _get_log_msg
2425
from litdata.helpers import _check_version_and_prompt_upgrade
25-
from litdata.loggers import _get_log_msg
2626
from litdata.streaming import Cache
2727
from litdata.streaming.item_loader import BaseItemLoader, ParquetLoader
2828
from litdata.streaming.resolver import Dir, _resolve_dir

src/litdata/streaming/downloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
_HF_HUB_AVAILABLE,
3232
_INDEX_FILENAME,
3333
)
34-
from litdata.loggers import _get_log_msg
34+
from litdata.debugger import _get_log_msg
3535
from litdata.streaming.client import S3Client
3636

3737
logger = logging.getLogger("litdata.streaming.downloader")

src/litdata/streaming/item_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
_PYARROW_AVAILABLE,
3333
_TORCH_DTYPES_MAPPING,
3434
)
35-
from litdata.loggers import ChromeTraceColors, _get_log_msg
35+
from litdata.debugger import ChromeTraceColors, _get_log_msg
3636
from litdata.streaming.serializers import Serializer
3737
from litdata.utilities._pytree import PyTree, tree_unflatten
3838
from litdata.utilities.encryption import Encryption, EncryptionLevel

src/litdata/streaming/reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from filelock import FileLock, Timeout
2424

2525
from litdata.constants import _DEBUG
26-
from litdata.loggers import _get_log_msg
26+
from litdata.debugger import _get_log_msg
2727
from litdata.streaming.config import ChunksConfig, Interval
2828
from litdata.streaming.item_loader import BaseItemLoader, ParquetLoader, PyTreeLoader, TokensLoader
2929
from litdata.streaming.sampler import ChunkedIndex

0 commit comments

Comments
 (0)