VkExpr: Implement multi-pass operations (Postfix only currently)

yuygfgg · yuygfgg · commit c13a9068d710 · 2025-12-19T20:55:01.000+08:00
diff --git a/README.md b/README.md
@@ -73,6 +73,22 @@ llvmexpr.VkExpr(clip[] clips, string[] expr[, int format, int boundary=0, int nu
 - `num_streams`: Number of concurrent Vulkan streams (default: 8). Increase this for better parallelism if you have a powerful GPU, or decrease it if you run into insufficient vram.
 - `device_id`: Selects which Vulkan physical device to run on (default: -1 = auto).
 
+### Multi-Pass Pipeline (VkExpr only)
+
+`VkExpr` supports executing multiple expressions sequentially for the same plane, with efficient zero-copy data transfer between them.
+
+- **Separator**: Use `##` to separate different stages in the expression string.
+- **Intermediate Access**: Use `bufN` to access the result of the N-th stage (0-indexed). `buf0` is the result of the first expression, `buf1` is the second, and so on. Relative and absolute access for buffers are also supported.
+
+**Example:**
+```python
+# Stage 1: x + 0.5 (result stored in buf0)
+# Stage 2: x + buf0 (calculates x + (x[1,1] + 0.5))
+core.llvmexpr.VkExpr(clip, expr="x 0.5 + ## x buf0[1,1] +")
+```
+
+Intermediate buffers are stored as `float32` on the GPU, with no clamping / quantization.
+
 ### `llvmexpr.SingleExpr` (Per-Frame)
 
 This function executes an expression only once per frame. It is not suitable for typical image filtering but is powerful for tasks that involve reading from arbitrary coordinates, calculating frame-wide metrics, and writing results to other pixels or to frame properties.
diff --git a/llvmexpr/codegen/glsl/GLSLGenerator.cpp b/llvmexpr/codegen/glsl/GLSLGenerator.cpp
@@ -30,12 +30,14 @@
 
 GLSLGenerator::GLSLGenerator(
     const std::vector<Token>& tokens, int num_inputs,
-    [[maybe_unused]] int width, [[maybe_unused]] int height,
-    bool mirror_boundary,
+    int num_intermediate_inputs, [[maybe_unused]] int width,
+    [[maybe_unused]] int height, bool mirror_boundary,
     const std::map<std::pair<int, std::string>, int>& prop_map,
     const analysis::ExpressionAnalysisResults& analysis_results)
-    : tokens(tokens), num_inputs(num_inputs), mirror_boundary(mirror_boundary),
-      prop_map(prop_map), analysis(analysis_results) {
+    : tokens(tokens), num_inputs(num_inputs),
+      num_intermediate_inputs(num_intermediate_inputs),
+      mirror_boundary(mirror_boundary), prop_map(prop_map),
+      analysis(analysis_results) {
 
     const auto& var_result = analysis.getVariableUsageResult();
     for (const auto& var_name : var_result.all_vars) {
@@ -350,10 +352,22 @@ void GLSLGenerator::emitBufferDeclarations() {
         emitNewline();
     }
 
+    // Intermediate buffers
+    for (int i = 0; i < num_intermediate_inputs; ++i) {
+        emitLine(std::format("layout(std430, set = 0, binding = {}) readonly "
+                             "buffer IntermediateBuffer{} {{",
+                             num_inputs + i, i));
+        indent();
+        emitLine("float data[];");
+        dedent();
+        emitLine(std::format("}} buf{};", i));
+        emitNewline();
+    }
+
     // Output buffer
     emitLine(std::format("layout(std430, set = 0, binding = {}) writeonly "
                          "buffer OutputBuffer {{",
-                         num_inputs));
+                         num_inputs + num_intermediate_inputs));
     indent();
     emitLine("float data[];");
     dedent();
@@ -363,7 +377,7 @@ void GLSLGenerator::emitBufferDeclarations() {
     // Props buffer
     emitLine(std::format(
         "layout(std430, set = 0, binding = {}) readonly buffer PropsBuffer {{",
-        num_inputs + 1));
+        num_inputs + num_intermediate_inputs + 1));
     indent();
     emitLine("float props[];");
     dedent();
@@ -1459,6 +1473,61 @@ void GLSLGenerator::processToken(const Token& token) {
         emitLine(std::format("int {} = int(roundEven({}));", y_int, coord_y));
 
         push(emitPixelLoad(payload.clip_idx, x_int, y_int, use_mirror));
+        push(emitPixelLoad(payload.clip_idx, x_int, y_int, use_mirror));
+        break;
+    }
+
+    case TokenType::BufferCur: {
+        const auto& payload = std::get<TokenPayloadBufferAccess>(token.payload);
+        std::string temp = newTemp();
+        emitLine(std::format("float {} = buf{}.data[gid];", temp,
+                             payload.buffer_idx));
+        push(temp);
+        break;
+    }
+    case TokenType::BufferRel: {
+        const auto& payload = std::get<TokenPayloadBufferAccess>(token.payload);
+        bool use_mirror =
+            payload.has_mode ? payload.use_mirror : mirror_boundary;
+
+        std::string x_expr = std::format("X + {}", payload.rel_x);
+        std::string y_expr = std::format("Y + {}", payload.rel_y);
+
+        std::string final_x =
+            emitFinalCoord(x_expr, "int(pc.width)", use_mirror);
+        std::string final_y =
+            emitFinalCoord(y_expr, "int(pc.height)", use_mirror);
+        std::string idx = emitPixelIndex(final_x, final_y);
+
+        std::string temp = newTemp();
+        emitLine(std::format("float {} = buf{}.data[{}];", temp,
+                             payload.buffer_idx, idx));
+        push(temp);
+        break;
+    }
+    case TokenType::BufferAbs: {
+        const auto& payload = std::get<TokenPayloadBufferAccess>(token.payload);
+        std::string coord_y = pop();
+        std::string coord_x = pop();
+        bool use_mirror =
+            payload.has_mode ? payload.use_mirror : mirror_boundary;
+
+        std::string x_int = newTemp();
+        std::string y_int = newTemp();
+
+        emitLine(std::format("int {} = int(roundEven({}));", x_int, coord_x));
+        emitLine(std::format("int {} = int(roundEven({}));", y_int, coord_y));
+
+        std::string final_x =
+            emitFinalCoord(x_int, "int(pc.width)", use_mirror);
+        std::string final_y =
+            emitFinalCoord(y_int, "int(pc.height)", use_mirror);
+        std::string idx = emitPixelIndex(final_x, final_y);
+
+        std::string temp = newTemp();
+        emitLine(std::format("float {} = buf{}.data[{}];", temp,
+                             payload.buffer_idx, idx));
+        push(temp);
         break;
     }
 
diff --git a/llvmexpr/codegen/glsl/GLSLGenerator.hpp b/llvmexpr/codegen/glsl/GLSLGenerator.hpp
@@ -34,8 +34,9 @@
 
 class GLSLGenerator {
   public:
-    GLSLGenerator(const std::vector<Token>& tokens, int num_inputs, int width,
-                  int height, bool mirror_boundary,
+    GLSLGenerator(const std::vector<Token>& tokens, int num_inputs,
+                  int num_intermediate_inputs, int width, int height,
+                  bool mirror_boundary,
                   const std::map<std::pair<int, std::string>, int>& prop_map,
                   const analysis::ExpressionAnalysisResults& analysis_results);
 
@@ -44,6 +45,7 @@ class GLSLGenerator {
   private:
     const std::vector<Token>& tokens;
     int num_inputs;
+    int num_intermediate_inputs;
     bool mirror_boundary;
     const std::map<std::pair<int, std::string>, int>& prop_map;
     const analysis::ExpressionAnalysisResults& analysis;
diff --git a/llvmexpr/frontend/Tokenizer.cpp b/llvmexpr/frontend/Tokenizer.cpp
@@ -97,15 +97,22 @@ constexpr Availability operator&(Availability lhs, Availability rhs) {
 }
 
 constexpr Availability AVAILABILITY_ALL =
-    Availability::Expr | Availability::SingleExpr;
+    Availability::Expr | Availability::SingleExpr | Availability::VkExpr;
 
 constexpr bool supports_mode(Availability availability, ExprMode mode) {
     if (mode == ExprMode::Expr) {
         return static_cast<std::uint8_t>(availability & Availability::Expr) !=
                0;
     }
-    return static_cast<std::uint8_t>(availability & Availability::SingleExpr) !=
-           0;
+    if (mode == ExprMode::SingleExpr) {
+        return static_cast<std::uint8_t>(availability &
+                                         Availability::SingleExpr) != 0;
+    }
+    if (mode == ExprMode::VkExpr) {
+        return static_cast<std::uint8_t>(availability & Availability::VkExpr) !=
+               0;
+    }
+    return false;
 }
 
 template <FixedString Str, TokenType Type>
@@ -552,6 +559,43 @@ inline std::optional<Token> parse_prop_store(std::string_view input) {
     return std::nullopt;
 }
 
+inline std::optional<Token> parse_buffer_access(std::string_view input) {
+    if (auto m = ctre::match<
+            R"(^buf(\d+)(?:(?:(\[\]))|(?:\[\s*(-?\d+)\s*,\s*(-?\d+)\s*\]))?(?::([cmb]))?$)">(
+            input)) {
+        TokenPayloadBufferAccess data;
+        data.buffer_idx = svtoi(m.template get<1>().to_view());
+
+        TokenType type = TokenType::BufferCur;
+
+        if (m.template get<2>()) {
+            type = TokenType::BufferAbs;
+        } else if (m.template get<3>()) {
+            type = TokenType::BufferRel;
+            data.rel_x = svtoi(m.template get<3>().to_view());
+            data.rel_y = svtoi(m.template get<4>().to_view());
+        }
+
+        // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
+        if (m.template get<5>()) {
+            // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
+            char mode_char = m.template get<5>().to_view()[0];
+            if (mode_char == 'm') {
+                data.has_mode = true;
+                data.use_mirror = true;
+            } else if (mode_char == 'c') {
+                data.has_mode = true;
+                data.use_mirror = false;
+            } else if (mode_char == 'b') {
+                data.has_mode = false;
+            }
+        }
+
+        return Token{.type = type, .text = std::string(input), .payload = data};
+    }
+    return std::nullopt;
+}
+
 inline std::optional<Token> parse_number(std::string_view input) {
     if (auto m = ctre::match<
             R"(^(?:(0x[0-9a-fA-F]+(?:\.[0-9a-fA-F]+(?:p[+\-]?\d+)?)?)|(0[0-7]+)|([+\-]?\d+(?:\.\d+)?(?:[eE][+\-]?\d+)?))$)">(
@@ -649,9 +693,9 @@ constexpr auto get_token_definitions() {
         make_literal_definition<FixedString{"?"}, TokenType::Ternary>(
             BEHAVIOR_TERNARY),
         make_literal_definition<FixedString{"X"}, TokenType::ConstantX>(
-            BEHAVIOR_ZERO_PUSH, Availability::Expr),
+            BEHAVIOR_ZERO_PUSH, Availability::Expr | Availability::VkExpr),
         make_literal_definition<FixedString{"Y"}, TokenType::ConstantY>(
-            BEHAVIOR_ZERO_PUSH, Availability::Expr),
+            BEHAVIOR_ZERO_PUSH, Availability::Expr | Availability::VkExpr),
         make_literal_definition<FixedString{"N"}, TokenType::ConstantN>(
             BEHAVIOR_ZERO_PUSH),
         make_literal_definition<FixedString{">="}, TokenType::Ge>(
@@ -695,7 +739,8 @@ constexpr auto get_token_definitions() {
         make_literal_definition<FixedString{"neg"}, TokenType::Neg>(
             BEHAVIOR_UNARY),
         make_literal_definition<FixedString{"@[]"}, TokenType::StoreAbs>(
-            TokenBehavior{.arity = 3, .stack_effect = -3}, Availability::Expr),
+            TokenBehavior{.arity = 3, .stack_effect = -3},
+            Availability::Expr | Availability::VkExpr),
         make_literal_definition<FixedString{"clip"}, TokenType::Clip>(
             TokenBehavior{.arity = 3, .stack_effect = -2}),
         make_literal_definition<FixedString{"sqrt"}, TokenType::Sqrt>(
@@ -742,8 +787,24 @@ constexpr auto get_token_definitions() {
             BEHAVIOR_UNARY),
         make_literal_definition<FixedString{"height"},
                                 TokenType::ConstantHeight>(BEHAVIOR_ZERO_PUSH),
+        TokenDefinition{.type = TokenType::BufferCur,
+                        .name = "bufN",
+                        .behavior = BEHAVIOR_ZERO_PUSH,
+                        .parser = parse_buffer_access,
+                        .availability = Availability::VkExpr},
+        TokenDefinition{.type = TokenType::BufferRel,
+                        .name = "bufN",
+                        .behavior = BEHAVIOR_ZERO_PUSH,
+                        .parser = parse_buffer_access,
+                        .availability = Availability::VkExpr},
+        TokenDefinition{.type = TokenType::BufferAbs,
+                        .name = "bufN",
+                        .behavior =
+                            TokenBehavior{.arity = 2, .stack_effect = -1},
+                        .parser = parse_buffer_access,
+                        .availability = Availability::VkExpr},
         make_literal_definition<FixedString{"^exit^"}, TokenType::ExitNoWrite>(
-            BEHAVIOR_ZERO_PUSH, Availability::Expr),
+            BEHAVIOR_ZERO_PUSH, Availability::Expr | Availability::VkExpr),
         make_literal_definition<FixedString{"copysign"}, TokenType::Copysign>(
             BEHAVIOR_BINARY),
         TokenDefinition{.type = TokenType::ConstantPlaneWidth,
@@ -866,7 +927,8 @@ constexpr auto get_token_definitions() {
                         .name = "clip_rel",
                         .behavior = BEHAVIOR_ZERO_PUSH,
                         .parser = parse_clip_rel,
-                        .availability = Availability::Expr},
+                        .availability =
+                            Availability::Expr | Availability::VkExpr},
         TokenDefinition{.type = TokenType::ClipAbs,
                         .name = "clip_abs",
                         .behavior =
@@ -877,7 +939,8 @@ constexpr auto get_token_definitions() {
                         .name = "clip_cur",
                         .behavior = BEHAVIOR_ZERO_PUSH,
                         .parser = parse_clip_cur,
-                        .availability = Availability::Expr},
+                        .availability =
+                            Availability::Expr | Availability::VkExpr},
         TokenDefinition{.type = TokenType::PropAccess,
                         .name = "prop_access",
                         .behavior = BEHAVIOR_ZERO_PUSH,
@@ -916,7 +979,7 @@ constexpr auto get_token_definitions() {
 } // anonymous namespace
 
 std::vector<Token> tokenize(const std::string& expr, int num_inputs,
-                            ExprMode mode) {
+                            ExprMode mode, int num_intermediate_inputs) {
     std::vector<Token> tokens;
     int idx = 0;
 
@@ -981,6 +1044,17 @@ std::vector<Token> tokenize(const std::string& expr, int num_inputs,
                     std::format("Invalid clip index in token: {} (idx {})",
                                 std::string(str_token_view), idx));
             }
+        } else if (parsed_token->type == TokenType::BufferRel ||
+                   parsed_token->type == TokenType::BufferAbs ||
+                   parsed_token->type == TokenType::BufferCur) {
+            if (std::get<TokenPayloadBufferAccess>(parsed_token->payload)
+                        .buffer_idx < 0 ||
+                std::get<TokenPayloadBufferAccess>(parsed_token->payload)
+                        .buffer_idx >= num_intermediate_inputs) {
+                throw std::runtime_error(
+                    std::format("Invalid buffer index in token: {} (idx {})",
+                                std::string(str_token_view), idx));
+            }
         }
 
         tokens.push_back(*parsed_token);
diff --git a/llvmexpr/frontend/Tokenizer.hpp b/llvmexpr/frontend/Tokenizer.hpp
@@ -62,6 +62,9 @@ enum class TokenType : std::uint8_t {
     ClipAbsPlane,  // src^plane[]
     StoreAbsPlane, // @[]^plane
     PropStore,     // prop$
+    BufferRel,     // bufN[x,y]
+    BufferAbs,     // bufN[]
+    BufferCur,     // bufN
 
     // Binary Operators
     Add,
@@ -205,13 +208,23 @@ struct TokenPayloadArrayOp {
     int static_size = 0; // ARRAY_ALLOC_STATIC
 };
 
+struct TokenPayloadBufferAccess {
+    int buffer_idx;
+    int rel_x = 0;
+    int rel_y = 0;
+    bool use_mirror = false;
+    bool has_mode = false;
+};
+
 struct Token {
-    using PayloadVariant = std::variant<
-        std::monostate, TokenPayloadNumber, TokenPayloadVar, TokenPayloadLabel,
-        TokenPayloadStackOp, TokenPayloadClipAccess, TokenPayloadPropAccess,
-        TokenPayloadClipAccessPlane, TokenPayloadStoreAbsPlane,
-        TokenPayloadPropStore, TokenPayloadPlaneDim, TokenPayloadClipDim,
-        TokenPayloadClipPlaneDim, TokenPayloadArrayOp>;
+    using PayloadVariant =
+        std::variant<std::monostate, TokenPayloadNumber, TokenPayloadVar,
+                     TokenPayloadLabel, TokenPayloadStackOp,
+                     TokenPayloadClipAccess, TokenPayloadPropAccess,
+                     TokenPayloadClipAccessPlane, TokenPayloadStoreAbsPlane,
+                     TokenPayloadPropStore, TokenPayloadPlaneDim,
+                     TokenPayloadClipDim, TokenPayloadClipPlaneDim,
+                     TokenPayloadArrayOp, TokenPayloadBufferAccess>;
 
     TokenType type;
     std::string text;
@@ -229,6 +242,7 @@ using BehaviorResolver = std::variant<TokenBehavior, DynamicBehaviorFn>;
 enum class ExprMode : std::uint8_t {
     Expr,
     SingleExpr,
+    VkExpr,
 };
 
 // Utility functions
@@ -250,15 +264,18 @@ struct TokenDefinition {
     enum class Availability : std::uint8_t {
         Expr = 1U << 0,
         SingleExpr = 1U << 1,
+        VkExpr = 1U << 2,
     };
 
     Availability availability = static_cast<Availability>(
         static_cast<std::uint8_t>(Availability::Expr) |
-        static_cast<std::uint8_t>(Availability::SingleExpr));
+        static_cast<std::uint8_t>(Availability::SingleExpr) |
+        static_cast<std::uint8_t>(Availability::VkExpr));
 };
 
 std::vector<Token> tokenize(const std::string& expr, int num_inputs,
-                            ExprMode mode = ExprMode::Expr);
+                            ExprMode mode = ExprMode::Expr,
+                            int num_intermediate_inputs = 0);
 TokenBehavior get_token_behavior(const Token& token);
 
 #endif // LLVMEXPR_FRONTEND_TOKENIZER_HPP
diff --git a/llvmexpr/llvmexpr.cpp b/llvmexpr/llvmexpr.cpp
diff --git a/tests/test_vk_multipass.py b/tests/test_vk_multipass.py