Skip to content

Commit ac3f1ee

Browse files
awaelchlitchatonlantiga
authored
Patch release 2.2.5 (#19893)
Co-authored-by: thomas chaton <[email protected]> Co-authored-by: Luca Antiga <[email protected]>
1 parent 2a46b0c commit ac3f1ee

File tree

16 files changed

+125
-36
lines changed

16 files changed

+125
-36
lines changed

docs/source-app/conf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,3 +449,6 @@ def find_source():
449449

450450
# ignore all links in any CHANGELOG file
451451
linkcheck_exclude_documents = [r"^(.*\/)*CHANGELOG.*$"]
452+
453+
# ignore the following relative links (false positive errors during linkcheck)
454+
linkcheck_ignore = ["https://openai.com/"]

docs/source-pytorch/conf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,6 @@ def _load_py_module(name: str, location: str) -> ModuleType:
343343
"graphcore": ("https://docs.graphcore.ai/en/latest/", None),
344344
"lightning_habana": ("https://lightning-ai.github.io/lightning-Habana/", None),
345345
"tensorboardX": ("https://tensorboardx.readthedocs.io/en/stable/", None),
346-
# needed for referencing App from lightning scope
347-
"lightning.app": ("https://lightning.ai/docs/app/stable/", None),
348346
# needed for referencing Fabric from lightning scope
349347
"lightning.fabric": ("https://lightning.ai/docs/fabric/stable/", None),
350348
# TODO: these are missing objects.inv
@@ -626,4 +624,5 @@ def package_list_from_file(file):
626624
"https://stackoverflow.com/questions/66640705/how-can-i-install-grpcio-on-an-apple-m1-silicon-laptop",
627625
"https://github.com/Lightning-AI/lightning/blob/master/examples/pytorch/ipu/mnist_sample.py",
628626
"https://ngc.nvidia.com/catalog/containers/nvidia:nemo", # in ecosystem/asr_nlp_tts.rst
627+
"https://openai.com/",
629628
]

requirements/app/app.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
lightning-cloud == 0.5.68 # Must be pinned to ensure compatibility
1+
lightning-cloud == 0.5.69 # Must be pinned to ensure compatibility
22
packaging
33
typing-extensions >=4.4.0, <4.10.0
44
deepdiff >=5.7.0, <6.6.0

src/lightning/app/core/app.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from lightning.app.api.request_types import _APIRequest, _CommandRequest, _DeltaRequest
3131
from lightning.app.core.constants import (
3232
BATCH_DELTA_COUNT,
33+
CHECK_ERROR_QUEUE_INTERVAL,
3334
DEBUG_ENABLED,
3435
FLOW_DURATION_SAMPLES,
3536
FLOW_DURATION_THRESHOLD,
@@ -165,6 +166,7 @@ def __init__(
165166

166167
self._last_run_time: float = 0.0
167168
self._run_times: list = []
169+
self._last_check_error_queue: float = 0.0
168170

169171
# Path attributes can't get properly attached during the initialization, because the full name
170172
# is only available after all Flows and Works have been instantiated.
@@ -318,10 +320,12 @@ def batch_get_state_changed_from_queue(q: BaseQueue, timeout: Optional[float] =
318320
return []
319321

320322
def check_error_queue(self) -> None:
321-
exception: Exception = self.get_state_changed_from_queue(self.error_queue) # type: ignore[assignment,arg-type]
322-
if isinstance(exception, Exception):
323-
self.exception = exception
324-
self.stage = AppStage.FAILED
323+
if (time() - self._last_check_error_queue) > CHECK_ERROR_QUEUE_INTERVAL:
324+
exception: Exception = self.get_state_changed_from_queue(self.error_queue) # type: ignore[assignment,arg-type]
325+
if isinstance(exception, Exception):
326+
self.exception = exception
327+
self.stage = AppStage.FAILED
328+
self._last_check_error_queue = time()
325329

326330
@property
327331
def flows(self) -> List[Union[LightningWork, "LightningFlow"]]:

src/lightning/app/core/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def get_lightning_cloud_url() -> str:
7070
LIGHTNING_COMPONENT_PUBLIC_REGISTRY = "https://lightning.ai/v1/components"
7171
LIGHTNING_APPS_PUBLIC_REGISTRY = "https://lightning.ai/v1/apps"
7272
LIGHTNING_MODELS_PUBLIC_REGISTRY = "https://lightning.ai/v1/models"
73+
ENABLE_ORCHESTRATOR = bool(int(os.getenv("ENABLE_ORCHESTRATOR", "1")))
7374

7475
LIGHTNING_CLOUDSPACE_HOST = os.getenv("LIGHTNING_CLOUDSPACE_HOST")
7576
LIGHTNING_CLOUDSPACE_EXPOSED_PORT_COUNT = int(os.getenv("LIGHTNING_CLOUDSPACE_EXPOSED_PORT_COUNT", "0"))
@@ -99,6 +100,7 @@ def get_lightning_cloud_url() -> str:
99100
SYS_CUSTOMIZATIONS_SYNC_PATH = ".sys-customizations-sync"
100101

101102
BATCH_DELTA_COUNT = int(os.getenv("BATCH_DELTA_COUNT", "128"))
103+
CHECK_ERROR_QUEUE_INTERVAL = float(os.getenv("CHECK_ERROR_QUEUE_INTERVAL", "30"))
102104

103105

104106
def enable_multiple_works_in_default_container() -> bool:

src/lightning/app/runners/cloud.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
CloudspaceIdRunsBody,
3535
Externalv1LightningappInstance,
3636
Gridv1ImageSpec,
37-
IdGetBody1,
37+
IdGetBody,
3838
ProjectIdCloudspacesBody,
3939
V1BuildSpec,
4040
V1CloudSpace,
@@ -1027,7 +1027,7 @@ def _api_create_run_instance(
10271027
project_id=project_id,
10281028
cloudspace_id=cloudspace_id,
10291029
id=run_id,
1030-
body=IdGetBody1(
1030+
body=IdGetBody(
10311031
cluster_id=cluster_id,
10321032
name=run_name,
10331033
desired_state=desired_state,

src/lightning/app/runners/multiprocess.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -81,16 +81,17 @@ def dispatch(self, *args: Any, open_ui: bool = True, **kwargs: Any):
8181

8282
_set_flow_context()
8383

84-
storage_orchestrator = StorageOrchestrator(
85-
self.app,
86-
self.app.request_queues,
87-
self.app.response_queues,
88-
self.app.copy_request_queues,
89-
self.app.copy_response_queues,
90-
)
91-
self.threads.append(storage_orchestrator)
92-
storage_orchestrator.setDaemon(True)
93-
storage_orchestrator.start()
84+
if constants.ENABLE_ORCHESTRATOR:
85+
storage_orchestrator = StorageOrchestrator(
86+
self.app,
87+
self.app.request_queues,
88+
self.app.response_queues,
89+
self.app.copy_request_queues,
90+
self.app.copy_response_queues,
91+
)
92+
self.threads.append(storage_orchestrator)
93+
storage_orchestrator.setDaemon(True)
94+
storage_orchestrator.start()
9495

9596
if self.start_server:
9697
self.app.should_publish_changes_to_api = True

src/lightning/app/utilities/network.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,14 @@ def create_retry_strategy():
9696
# are going to be alive for a very long time (~ 4 days) but retries every 120 seconds
9797
total=_CONNECTION_RETRY_TOTAL,
9898
backoff_factor=_CONNECTION_RETRY_BACKOFF_FACTOR,
99+
# Any 4xx and 5xx statuses except
100+
# 400 Bad Request
101+
# 401 Unauthorized
102+
# 403 Forbidden
103+
# 404 Not Found
99104
status_forcelist={
100-
408, # Request Timeout
101-
429, # Too Many Requests
102-
*range(500, 600), # Any 5xx Server Error status
105+
402,
106+
*range(405, 600),
103107
},
104108
allowed_methods={
105109
"POST", # Default methods are idempotent, add POST here

src/lightning/fabric/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
66

77

8+
## [2.2.5] - 2024-05-23
9+
10+
### Fixed
11+
12+
- Fixed a matrix shape mismatch issue when running a model loaded from a quantized checkpoint (bitsandbytes) ([#19886](https://github.com/Lightning-AI/lightning/pull/19886))
13+
14+
815
## [2.2.2] - 2024-04-11
916

1017
### Fixed

src/lightning/fabric/plugins/precision/bitsandbytes.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,9 @@ def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torc
234234
"""Inplace quantize."""
235235
if weight is None:
236236
weight = self.weight.data
237-
if weight.data.type == torch.int8:
238-
# already quantized
239-
return
237+
if weight.data.dtype == torch.int8:
238+
# already quantized
239+
return
240240
assert isinstance(self.weight, bnb.nn.Int8Params)
241241
self.weight = self.quantize(self.weight, weight, device)
242242

@@ -318,9 +318,9 @@ def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torc
318318
"""Inplace quantize."""
319319
if weight is None:
320320
weight = self.weight.data
321-
if weight.data.type == torch.uint8:
322-
# already quantized
323-
return
321+
if weight.data.dtype == torch.uint8:
322+
# already quantized
323+
return
324324
assert isinstance(self.weight, bnb.nn.Params4bit)
325325
self.weight = self.quantize(self.weight, weight, device)
326326

0 commit comments

Comments
 (0)