Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions vllm/compilation/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,23 +100,36 @@
"transformed_code.py")
if not os.path.exists(decompiled_file):
try:
# usually the decompilation will succeed for most models,
# as we guarantee a full-graph compilation in Dynamo.
# but there's no 100% guarantee, since decompliation is
# not a reversible process.
import depyf
src = depyf.decompile(new_code)

with open(decompiled_file, "w") as f:
f.write(src)

logger.debug("Dynamo transformed code saved to %s",
decompiled_file)
# Check if we should perform actual decompilation or write placeholder

Check failure on line 103 in vllm/compilation/wrapper.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/compilation/wrapper.py:103:81: E501 Line too long (90 > 80)
if envs.VLLM_COMPILE_DEPYF:
# Perform actual decompilation when VLLM_COMPILE_DEPYF=1
# usually the decompilation will succeed for most models,

Check failure on line 106 in vllm/compilation/wrapper.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/compilation/wrapper.py:106:81: E501 Line too long (81 > 80)
# as we guarantee a full-graph compilation in Dynamo.
# but there's no 100% guarantee, since decompliation is
# not a reversible process.
import depyf
src = depyf.decompile(new_code)

with open(decompiled_file, "w") as f:
f.write(src)

logger.debug("Dynamo transformed code saved to %s",
decompiled_file)
else:
# Write placeholder file with comment when VLLM_COMPILE_DEPYF=0 (default)

Check failure on line 119 in vllm/compilation/wrapper.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/compilation/wrapper.py:119:81: E501 Line too long (97 > 80)
placeholder_content = "# Please set VLLM_COMPILE_DEPYF=1 to populate this file\n"

Check failure on line 120 in vllm/compilation/wrapper.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/compilation/wrapper.py:120:81: E501 Line too long (105 > 80)
with open(decompiled_file, "w") as f:
f.write(placeholder_content)
logger.debug("Placeholder Dynamo transformed code saved to %s. "
"Set VLLM_COMPILE_DEPYF=1 to perform actual decompilation.",
decompiled_file)

Check failure on line 125 in vllm/compilation/wrapper.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/compilation/wrapper.py:125:81: E501 Line too long (88 > 80)
except Exception:
pass

if self.vllm_config.compilation_config.use_cudagraph and \
"update" in new_code.co_names:
# For cudagraph error checking, we always perform decompilation regardless of VLLM_COMPILE_DEPYF
# because this is a critical error checking mechanism

Check failure on line 132 in vllm/compilation/wrapper.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/compilation/wrapper.py:132:81: E501 Line too long (108 > 80)
import depyf
src = depyf.decompile(new_code)
Comment on lines 133 to 134

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This depyf.decompile call is also present earlier in this function (around line 111) for debugging purposes. Since decompilation can be an expensive operation, it would be more efficient to perform it only once and reuse the result.

Consider refactoring to decompile at most once per bytecode_hook call, for example by storing the result in a local variable in a higher scope.

msg = "Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):\n" + src # noqa
Comment on lines +103 to 135

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There's a potential for depyf.decompile(new_code) to be called twice: once within the if envs.VLLM_COMPILE_DEPYF: block, and again within the if self.vllm_config.compilation_config.use_cudagraph and ...: block. To avoid redundant decompilations, consider refactoring to decompile lazily and cache the result within the function scope. For example:

        if not os.path.exists(decompiled_file):
            try:
                src = None
                def _decompile_once():
                    nonlocal src
                    if src is None:
                        import depyf
                        src = depyf.decompile(new_code)
                    return src

                if envs.VLLM_COMPILE_DEPYF:
                    decompiled_src = _decompile_once()
                    with open(decompiled_file, "w") as f:
                        f.write(decompiled_src)

                    logger.debug("Dynamo transformed code saved to %s",
                                 decompiled_file)
                else:
                    placeholder_content = "# Please set VLLM_COMPILE_DEPYF=1 to populate this file\n"
                    with open(decompiled_file, "w") as f:
                        f.write(placeholder_content)
                    logger.debug("Placeholder Dynamo transformed code saved to %s. "
                                 "Set VLLM_COMPILE_DEPYF=1 to perform actual decompilation.",
                                 decompiled_file)
            except Exception:
                pass

        if self.vllm_config.compilation_config.use_cudagraph and \
            "update" in new_code.co_names:
            decompiled_src = _decompile_once()
            msg = "Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):\n" + decompiled_src  # noqa

This would require declaring src at a higher scope within if not os.path.exists(decompiled_file): to be shared between the two blocks.

        if not os.path.exists(decompiled_file):
            try:
                src = None
                def _decompile_once():
                    nonlocal src
                    if src is None:
                        import depyf
                        src = depyf.decompile(new_code)
                    return src

                if envs.VLLM_COMPILE_DEPYF:
                    decompiled_src = _decompile_once()
                    with open(decompiled_file, "w") as f:
                        f.write(decompiled_src)

                    logger.debug("Dynamo transformed code saved to %s",
                                 decompiled_file)
                else:
                    placeholder_content = "# Please set VLLM_COMPILE_DEPYF=1 to populate this file\n"
                    with open(decompiled_file, "w") as f:
                        f.write(placeholder_content)
                    logger.debug("Placeholder Dynamo transformed code saved to %s. "
                                 "Set VLLM_COMPILE_DEPYF=1 to perform actual decompilation.",
                                 decompiled_file)
            except Exception:
                pass

        if self.vllm_config.compilation_config.use_cudagraph and \
            "update" in new_code.co_names:
            decompiled_src = _decompile_once()
            msg = "Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):\n" + decompiled_src  # noqa

Expand Down
5 changes: 5 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,11 @@ def get_vllm_port() -> Optional[int]:
lambda: float(os.getenv("VLLM_LOG_BATCHSIZE_INTERVAL", "-1")),
"VLLM_DISABLE_COMPILE_CACHE":
lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
# Controls whether to perform depyf decompilation during compilation
# 0 (default): Write placeholder file with comment
# 1: Perform actual depyf decompilation
"VLLM_COMPILE_DEPYF":
lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))),

# If set, vllm will run in development mode, which will enable
# some additional endpoints for developing and debugging,
Expand Down
Loading