-
Notifications
You must be signed in to change notification settings - Fork 0
feat(compilation): add VLLM_COMPILE_DEPYF env var to control depyf de… #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -100,23 +100,36 @@ | |
| "transformed_code.py") | ||
| if not os.path.exists(decompiled_file): | ||
| try: | ||
| # usually the decompilation will succeed for most models, | ||
| # as we guarantee a full-graph compilation in Dynamo. | ||
| # but there's no 100% guarantee, since decompliation is | ||
| # not a reversible process. | ||
| import depyf | ||
| src = depyf.decompile(new_code) | ||
|
|
||
| with open(decompiled_file, "w") as f: | ||
| f.write(src) | ||
|
|
||
| logger.debug("Dynamo transformed code saved to %s", | ||
| decompiled_file) | ||
| # Check if we should perform actual decompilation or write placeholder | ||
| if envs.VLLM_COMPILE_DEPYF: | ||
| # Perform actual decompilation when VLLM_COMPILE_DEPYF=1 | ||
| # usually the decompilation will succeed for most models, | ||
| # as we guarantee a full-graph compilation in Dynamo. | ||
| # but there's no 100% guarantee, since decompliation is | ||
| # not a reversible process. | ||
| import depyf | ||
| src = depyf.decompile(new_code) | ||
|
|
||
| with open(decompiled_file, "w") as f: | ||
| f.write(src) | ||
|
|
||
| logger.debug("Dynamo transformed code saved to %s", | ||
| decompiled_file) | ||
| else: | ||
| # Write placeholder file with comment when VLLM_COMPILE_DEPYF=0 (default) | ||
| placeholder_content = "# Please set VLLM_COMPILE_DEPYF=1 to populate this file\n" | ||
| with open(decompiled_file, "w") as f: | ||
| f.write(placeholder_content) | ||
| logger.debug("Placeholder Dynamo transformed code saved to %s. " | ||
| "Set VLLM_COMPILE_DEPYF=1 to perform actual decompilation.", | ||
| decompiled_file) | ||
| except Exception: | ||
| pass | ||
|
|
||
| if self.vllm_config.compilation_config.use_cudagraph and \ | ||
| "update" in new_code.co_names: | ||
| # For cudagraph error checking, we always perform decompilation regardless of VLLM_COMPILE_DEPYF | ||
| # because this is a critical error checking mechanism | ||
| import depyf | ||
| src = depyf.decompile(new_code) | ||
| msg = "Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):\n" + src # noqa | ||
|
Comment on lines
+103
to
135
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a potential for if not os.path.exists(decompiled_file):
try:
src = None
def _decompile_once():
nonlocal src
if src is None:
import depyf
src = depyf.decompile(new_code)
return src
if envs.VLLM_COMPILE_DEPYF:
decompiled_src = _decompile_once()
with open(decompiled_file, "w") as f:
f.write(decompiled_src)
logger.debug("Dynamo transformed code saved to %s",
decompiled_file)
else:
placeholder_content = "# Please set VLLM_COMPILE_DEPYF=1 to populate this file\n"
with open(decompiled_file, "w") as f:
f.write(placeholder_content)
logger.debug("Placeholder Dynamo transformed code saved to %s. "
"Set VLLM_COMPILE_DEPYF=1 to perform actual decompilation.",
decompiled_file)
except Exception:
pass
if self.vllm_config.compilation_config.use_cudagraph and \
"update" in new_code.co_names:
decompiled_src = _decompile_once()
msg = "Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):\n" + decompiled_src # noqaThis would require declaring if not os.path.exists(decompiled_file):
try:
src = None
def _decompile_once():
nonlocal src
if src is None:
import depyf
src = depyf.decompile(new_code)
return src
if envs.VLLM_COMPILE_DEPYF:
decompiled_src = _decompile_once()
with open(decompiled_file, "w") as f:
f.write(decompiled_src)
logger.debug("Dynamo transformed code saved to %s",
decompiled_file)
else:
placeholder_content = "# Please set VLLM_COMPILE_DEPYF=1 to populate this file\n"
with open(decompiled_file, "w") as f:
f.write(placeholder_content)
logger.debug("Placeholder Dynamo transformed code saved to %s. "
"Set VLLM_COMPILE_DEPYF=1 to perform actual decompilation.",
decompiled_file)
except Exception:
pass
if self.vllm_config.compilation_config.use_cudagraph and \
"update" in new_code.co_names:
decompiled_src = _decompile_once()
msg = "Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):\n" + decompiled_src # noqa |
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This
depyf.decompilecall is also present earlier in this function (around line 111) for debugging purposes. Since decompilation can be an expensive operation, it would be more efficient to perform it only once and reuse the result.Consider refactoring to decompile at most once per
bytecode_hookcall, for example by storing the result in a local variable in a higher scope.