Skip to content

Conversation

@anlowee
Copy link

@anlowee anlowee commented Jul 11, 2025

Description

This PR adds Prestissimo support for the CLP connector. The native connector is now the default option and is responsible for deserializing classes from the Java coordinator, which are then used by Velox for query execution.

This PR also disables the gcc restrict warning since it's triggering a false positive with gcc 12:

gcc 12 -Wrestrict false positive
FAILED: velox/velox/connectors/clp/CMakeFiles/velox_clp_connector.dir/ClpDataSource.cpp.o 
ccache /opt/rh/gcc-toolset-12/root/bin/g++ -DBOOST_ATOMIC_DYN_LINK -DBOOST_ATOMIC_NO_LIB -DBOOST_CONTEXT_DYN_LINK -DBOOST_CONTEXT_NO_LIB -DBOOST_FILESYSTEM_DYN_LINK -DBOOST_FILESYSTEM_NO_LIB -DBOOST_PROGRAM_OPTIONS_DYN_LINK -DBOOST_PROGRAM_OPTIONS_NO_LIB -DBOOST_REGEX_DYN_LINK -DBOOST_REGEX_NO_LIB -DBOOST_SYSTEM_DYN_LINK -DBOOST_SYSTEM_NO_LIB -DBOOST_THREAD_DYN_LINK -DBOOST_THREAD_NO_LIB -DFOLLY_CFG_NO_COROUTINES -DFOLLY_HAVE_INT128_T=1 -DGFLAGS_IS_A_DLL=0 -DMSGPACK_DEFAULT_API_VERSION=3 -DMSGPACK_NO_BOOST -DPRESTO_ENABLE_REMOTE_FUNCTIONS -DSIMDJSON_THREADS_ENABLED=1 -DVELOX_ENABLE_PARQUET -I/__w/presto/presto/presto-native-execution/. -I/__w/presto/presto/presto-native-execution/velox -I/__w/presto/presto/presto-native-execution/velox/velox/external/xxhash -I/__w/presto/presto/presto-native-execution/_build/release/velox -I/__w/presto/presto/presto-native-execution/_build/release -I/__w/presto/presto/presto-native-execution/velox/. -I/__w/presto/presto/presto-native-execution/_build/release/_deps/clp-src/components/core/src/clp_s/.. -I/__w/presto/presto/presto-native-execution/_build/release/_deps/simdjson-src/include -I/__w/presto/presto/presto-native-execution/_build/release/_deps/xsimd-src/include -isystem /usr/local/include/proxygen -isystem /__w/presto/presto/presto-native-execution/velox/velox -isystem /__w/presto/presto/presto-native-execution/velox/velox/external -isystem /__w/presto/presto/presto-native-execution/_build/release/_deps/msgpack-cxx-src/include -isystem /__w/presto/presto/presto-native-execution/_build/release/_deps/msgpack-cxx-build/include -isystem /usr/include/libdwarf-0 -mavx2 -mfma -mavx -mf16c -mlzcnt -mbmi2 -Wno-nullability-completeness -Wno-deprecated-declarations -Wreorder -mavx2 -mfma -mavx -mf16c -mlzcnt -mbmi2 -D USE_VELOX_COMMON_BASE -D HAS_UNCAUGHT_EXCEPTIONS -DFOLLY_CFG_NO_COROUTINES -Wall -Wextra -Wno-unused        -Wno-unused-parameter        -Wno-sign-compare        -Wno-ignored-qualifiers        -Wno-implicit-fallthrough          -Wno-class-memaccess          -Wno-comment          -Wno-int-in-bool-context          -Wno-redundant-move          -Wno-array-bounds          -Wno-maybe-uninitialized          -Wno-unused-result          -Wno-format-overflow          -Wno-strict-aliasing -Werror -O3 -DNDEBUG -std=gnu++20 -fPIC -MD -MT velox/velox/connectors/clp/CMakeFiles/velox_clp_connector.dir/ClpDataSource.cpp.o -MF velox/velox/connectors/clp/CMakeFiles/velox_clp_connector.dir/ClpDataSource.cpp.o.d -o velox/velox/connectors/clp/CMakeFiles/velox_clp_connector.dir/ClpDataSource.cpp.o -c /__w/presto/presto/presto-native-execution/velox/velox/connectors/clp/ClpDataSource.cpp
In file included from /opt/rh/gcc-toolset-12/root/usr/include/c++/12/ios:40,
                 from /opt/rh/gcc-toolset-12/root/usr/include/c++/12/ostream:38,
                 from /opt/rh/gcc-toolset-12/root/usr/include/c++/12/bits/unique_ptr.h:41,
                 from /opt/rh/gcc-toolset-12/root/usr/include/c++/12/memory:76,
                 from /usr/local/include/folly/Function.h:205,
                 from /usr/local/include/folly/CancellationToken.h:20,
                 from /__w/presto/presto/presto-native-execution/velox/velox/connectors/Connector.h:18,
                 from /__w/presto/presto/presto-native-execution/velox/velox/connectors/clp/ClpColumnHandle.h:19,
                 from /__w/presto/presto/presto-native-execution/velox/velox/connectors/clp/ClpDataSource.cpp:19:
In static member function 'static constexpr std::char_traits<char>::char_type* std::char_traits<char>::copy(char_type*, const char_type*, std::size_t)',
    inlined from 'static constexpr void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_S_copy(_CharT*, const _CharT*, size_type) [with _CharT = char; _Traits = std::char_traits<char>; _Alloc = std::allocator<char>]' at /opt/rh/gcc-toolset-12/root/usr/include/c++/12/bits/basic_string.h:423:21,
    inlined from 'constexpr std::__cxx11::basic_string<_CharT, _Traits, _Allocator>& std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_M_replace(size_type, size_type, const _CharT*, size_type) [with _CharT = char; _Traits = std::char_traits<char>; _Alloc = std::allocator<char>]' at /opt/rh/gcc-toolset-12/root/usr/include/c++/12/bits/basic_string.tcc:532:22,
    inlined from 'constexpr std::__cxx11::basic_string<_CharT, _Traits, _Alloc>& std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::assign(const _CharT*) [with _CharT = char; _Traits = std::char_traits<char>; _Alloc = std::allocator<char>]' at /opt/rh/gcc-toolset-12/root/usr/include/c++/12/bits/basic_string.h:1647:19,
    inlined from 'constexpr std::__cxx11::basic_string<_CharT, _Traits, _Alloc>& std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::operator=(const _CharT*) [with _CharT = char; _Traits = std::char_traits<char>; _Alloc = std::allocator<char>]' at /opt/rh/gcc-toolset-12/root/usr/include/c++/12/bits/basic_string.h:815:28,
    inlined from 'facebook::velox::connector::clp::ClpDataSource::ClpDataSource(const facebook::velox::RowTypePtr&, const std::shared_ptr<facebook::velox::connector::ConnectorTableHandle>&, const std::unordered_map<std::__cxx11::basic_string<char>, std::shared_ptr<facebook::velox::connector::ColumnHandle> >&, facebook::velox::memory::MemoryPool*, std::shared_ptr<const facebook::velox::connector::clp::ClpConfig>&)' at /__w/presto/presto/presto-native-execution/velox/velox/connectors/clp/ClpDataSource.cpp:43:17:
/opt/rh/gcc-toolset-12/root/usr/include/c++/12/bits/char_traits.h:431:56: error: 'void* __builtin_memcpy(void*, const void*, long unsigned int)' accessing 9223372036854775810 or more bytes at offsets -4611686018427387902 and [-4611686018427387903, 4611686018427387904] may overlap up to 9223372036854775813 bytes at offset -3 [-Werror=restrict]
  431 |         return static_cast<char_type*>(__builtin_memcpy(__s1, __s2, __n));
      |                                        ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~

This PR also modifies java-to-struct-json.py where we changed the path of license-header.py, since y-scope/velox#11 refactored Velox's pre-commit check.

Checklist

  • The PR satisfies the contribution guidelines.
  • This is a breaking change and that has been indicated in the PR title, OR this isn't a
    breaking change.
  • Necessary docs have been updated, OR no docs need to be updated.

Validation performed

  1. Passed the CI.
  2. Did the end-2-end test for MongoDB-256MB dataset.

Summary by CodeRabbit

Summary by CodeRabbit

  • New Features

    • Added support for the "clp" connector with comprehensive protocol structures, registration, and conversion implementations.
    • Enabled JSON serialization and deserialization for CLP connector protocol types, including polymorphism and enum handling.
    • Integrated the CLP connector into build, test systems, and protocol generation workflows.
  • Chores

    • Updated build configurations and scripts to include the CLP connector and its dependencies.
    • Upgraded the Velox submodule to a newer commit.
  • Style

    • Expanded disabled compiler warnings for improved build consistency.

@coderabbitai
Copy link

coderabbitai bot commented Jul 11, 2025

## Walkthrough

A new "clp" connector is integrated into Presto Native Execution. This includes protocol definitions, C++ classes, JSON (de)serialization, registration logic, and build system updates. The changes introduce new source, header, schema, and template files, update the protocol generation workflow, and ensure the "clp" connector is available in both production and test builds.

## Changes

| File(s)                                                                                      | Change Summary                                                                                                   |
|---------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------|
| presto-native-execution/CMakeLists.txt                                                      | Added `-Wno-restrict` to disabled compiler warnings with explanatory comment.                                   |
| presto-native-execution/presto_cpp/main/CMakeLists.txt<br>presto_cpp/main/connectors/CMakeLists.txt<br>presto_cpp/main/tests/CMakeLists.txt<br>presto_cpp/main/types/tests/CMakeLists.txt | Linked `velox_clp_connector` library to server, connector, and test targets.                                     |
| presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.cpp<br>presto_cpp/main/connectors/PrestoToVeloxConnector.h | Implemented and declared `ClpPrestoToVeloxConnector`, with methods for split, column, and table handle conversion.|
| presto-native-execution/presto_cpp/main/connectors/Registration.cpp                         | Registered the "clp" connector and its factory in the connector registry.                                        |
| presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp          | Added "clp" connector to test registration and verification.                                                     |
| presto-native-execution/presto_cpp/presto_protocol/Makefile                                 | Integrated "clp" connector into protocol code and schema generation workflow.                                    |
| presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h     | Added type alias for `ClpConnectorProtocol` specialization.                                                      |
| presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h<br>presto_protocol_clp.cpp | Defined and implemented CLP connector protocol structs and their JSON (de)serialization.                         |
| presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json<br>presto_protocol_clp.yml | Added schema and YAML config for CLP protocol entities and subclass mappings.                                    |
| presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache<br>presto_protocol-json-hpp.mustache | Added Mustache templates for generating JSON (de)serialization for CLP protocol types.                           |
| presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc<br>special/ClpTransactionHandle.hpp.inc<br>special/ClpTransactionHandle.cpp.inc | Added special-case header and source for CLP column and transaction handle (de)serialization.                    |
| presto-native-execution/presto_cpp/presto_protocol/java-to-struct-json.py                   | Updated license header removal script path.                                                                      |
| presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp<br>presto_protocol.h | Included CLP protocol implementation and header.                                                                 |
| presto-native-execution/velox                                                               | Updated subproject commit reference.                                                                             |

## Sequence Diagram(s)

```mermaid
sequenceDiagram
    participant PrestoServer
    participant ConnectorRegistry
    participant ClpPrestoToVeloxConnector
    participant VeloxCLPConnector
    participant Protocol

    PrestoServer->>ConnectorRegistry: registerConnectors()
    ConnectorRegistry->>ClpPrestoToVeloxConnector: Instantiate
    ClpPrestoToVeloxConnector->>VeloxCLPConnector: Register with factory
    PrestoServer->>ClpPrestoToVeloxConnector: toVeloxSplit/ColumnHandle/TableHandle
    ClpPrestoToVeloxConnector->>Protocol: Convert Presto protocol objects to Velox objects
    Protocol-->>ClpPrestoToVeloxConnector: Return Velox connector objects
    ClpPrestoToVeloxConnector-->>PrestoServer: Return Velox objects

<!-- walkthrough_end -->


---

<details>
<summary>📜 Recent review details</summary>

**Configuration used: .coderabbit.yaml**
**Review profile: ASSERTIVE**
**Plan: Pro**


<details>
<summary>📥 Commits</summary>

Reviewing files that changed from the base of the PR and between 46155a72a57575061cf905f0eb08ddeac92bb564 and ca813fab5b5c6b711ea75c4563eea9ac4f085901.

</details>

<details>
<summary>📒 Files selected for processing (2)</summary>

* `presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt` (1 hunks)
* `presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp` (1 hunks)

</details>

<details>
<summary>🧰 Additional context used</summary>

<details>
<summary>🧠 Learnings (1)</summary>

<details>
<summary>📓 Common learnings</summary>

Learnt from: wraymo
PR: #15
File: presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java:22-33
Timestamp: 2025-06-13T12:56:06.325Z
Learning: ClpMetadataProvider is instantiated only once and used solely by the Presto coordinator, so concurrency/thread-safety guarantees are unnecessary.


</details>

</details>

</details>

<details>
<summary>⏰ Context from checks skipped due to timeout of 90000ms. You can increase the timeout in your CodeRabbit configuration to a maximum of 15 minutes (900000ms). (15)</summary>

* GitHub Check: test (17.0.13, :presto-main)
* GitHub Check: test (17.0.13, :presto-tests -P ci-only-tpch-distributed-queries)
* GitHub Check: test (17.0.13, :presto-tests -P ci-only-resource-manager)
* GitHub Check: test (17.0.13, :presto-tests -P ci-only-plan-determinism)
* GitHub Check: test (17.0.13, :presto-tests -P ci-only-distributed-non-hash-gen)
* GitHub Check: test (17.0.13, :presto-tests -P ci-only-local-queries)
* GitHub Check: test (8.0.442, :presto-tests -P ci-only-aggregation-queries)
* GitHub Check: test (8.0.442, :presto-main-base)
* GitHub Check: test (8.0.442, :presto-tests -P ci-only-tpch-distributed-queries)
* GitHub Check: test (8.0.442, :presto-tests -P ci-only-plan-determinism)
* GitHub Check: test (8.0.442, :presto-tests -P presto-tests-general)
* GitHub Check: test (8.0.442, :presto-tests -P presto-tests-execution-memory)
* GitHub Check: maven-checks (8.0.442)
* GitHub Check: maven-checks (17.0.13)
* GitHub Check: prestocpp-linux-build-for-test

</details>

<details>
<summary>🔇 Additional comments (1)</summary><blockquote>

<details>
<summary>presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp (1)</summary>

`26-27`: **LGTM! The CLP connector test follows the correct pattern.**

The implementation correctly follows the existing pattern for testing connectors and maintains alphabetical order in the connector list.

</details>

</blockquote></details>

</details>
<!-- internal state start -->


<!--  -->

<!-- internal state end -->
<!-- finishing_touch_checkbox_start -->

<details open="true">
<summary>✨ Finishing Touches</summary>

- [ ] <!-- {"checkboxId": "7962f53c-55bc-4827-bfbf-6a18da830691"} --> 📝 Generate Docstrings

</details>

<!-- finishing_touch_checkbox_end -->
<!-- tips_start -->

---

Thanks for using CodeRabbit! It's free for OSS, and your support helps us grow. If you like it, consider giving us a shout-out.

<details>
<summary>❤️ Share</summary>

- [X](https://twitter.com/intent/tweet?text=I%20just%20used%20%40coderabbitai%20for%20my%20code%20review%2C%20and%20it%27s%20fantastic%21%20It%27s%20free%20for%20OSS%20and%20offers%20a%20free%20trial%20for%20the%20proprietary%20code.%20Check%20it%20out%3A&url=https%3A//coderabbit.ai)
- [Mastodon](https://mastodon.social/share?text=I%20just%20used%20%40coderabbitai%20for%20my%20code%20review%2C%20and%20it%27s%20fantastic%21%20It%27s%20free%20for%20OSS%20and%20offers%20a%20free%20trial%20for%20the%20proprietary%20code.%20Check%20it%20out%3A%20https%3A%2F%2Fcoderabbit.ai)
- [Reddit](https://www.reddit.com/submit?title=Great%20tool%20for%20code%20review%20-%20CodeRabbit&text=I%20just%20used%20CodeRabbit%20for%20my%20code%20review%2C%20and%20it%27s%20fantastic%21%20It%27s%20free%20for%20OSS%20and%20offers%20a%20free%20trial%20for%20proprietary%20code.%20Check%20it%20out%3A%20https%3A//coderabbit.ai)
- [LinkedIn](https://www.linkedin.com/sharing/share-offsite/?url=https%3A%2F%2Fcoderabbit.ai&mini=true&title=Great%20tool%20for%20code%20review%20-%20CodeRabbit&summary=I%20just%20used%20CodeRabbit%20for%20my%20code%20review%2C%20and%20it%27s%20fantastic%21%20It%27s%20free%20for%20OSS%20and%20offers%20a%20free%20trial%20for%20proprietary%20code)

</details>

<details>
<summary>🪧 Tips</summary>

### Chat

There are 3 ways to chat with [CodeRabbit](https://coderabbit.ai?utm_source=oss&utm_medium=github&utm_campaign=y-scope/presto&utm_content=34):

- Review comments: Directly reply to a review comment made by CodeRabbit. Example:
  - `I pushed a fix in commit <commit_id>, please review it.`
  - `Explain this complex logic.`
  - `Open a follow-up GitHub issue for this discussion.`
- Files and specific lines of code (under the "Files changed" tab): Tag `@coderabbitai` in a new review comment at the desired location with your query. Examples:
  - `@coderabbitai explain this code block.`
  -	`@coderabbitai modularize this function.`
- PR comments: Tag `@coderabbitai` in a new PR comment to ask questions about the PR branch. For the best results, please provide a very specific query, as very limited context is provided in this mode. Examples:
  - `@coderabbitai gather interesting stats about this repository and render them as a table. Additionally, render a pie chart showing the language distribution in the codebase.`
  - `@coderabbitai read src/utils.ts and explain its main purpose.`
  - `@coderabbitai read the files in the src/scheduler package and generate a class diagram using mermaid and a README in the markdown format.`
  - `@coderabbitai help me debug CodeRabbit configuration file.`

### Support

Need help? Create a ticket on our [support page](https://www.coderabbit.ai/contact-us/support) for assistance with any issues or questions.

Note: Be mindful of the bot's finite context window. It's strongly recommended to break down tasks such as reading entire modules into smaller chunks. For a focused discussion, use review comments to chat about specific files and their changes, instead of using the PR comments.

### CodeRabbit Commands (Invoked using PR comments)

- `@coderabbitai pause` to pause the reviews on a PR.
- `@coderabbitai resume` to resume the paused reviews.
- `@coderabbitai review` to trigger an incremental review. This is useful when automatic reviews are disabled for the repository.
- `@coderabbitai full review` to do a full review from scratch and review all the files again.
- `@coderabbitai summary` to regenerate the summary of the PR.
- `@coderabbitai generate docstrings` to [generate docstrings](https://docs.coderabbit.ai/finishing-touches/docstrings) for this PR.
- `@coderabbitai generate sequence diagram` to generate a sequence diagram of the changes in this PR.
- `@coderabbitai resolve` resolve all the CodeRabbit review comments.
- `@coderabbitai configuration` to show the current CodeRabbit configuration for the repository.
- `@coderabbitai help` to get help.

### Other keywords and placeholders

- Add `@coderabbitai ignore` anywhere in the PR description to prevent this PR from being reviewed.
- Add `@coderabbitai summary` to generate the high-level summary at a specific location in the PR description.
- Add `@coderabbitai` anywhere in the PR title to generate the title automatically.

### Documentation and Community

- Visit our [Documentation](https://docs.coderabbit.ai) for detailed information on how to use CodeRabbit.
- Join our [Discord Community](http://discord.gg/coderabbit) to get help, request features, and share feedback.
- Follow us on [X/Twitter](https://twitter.com/coderabbitai) for updates and announcements.

</details>

<!-- tips_end -->

@anlowee anlowee requested a review from kirkrodrigues July 11, 2025 17:03
Copy link

@coderabbitai coderabbitai bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actionable comments posted: 14

🔭 Outside diff range comments (1)
presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt (1)

20-22: Unconditionally linking velox_clp_connector can break builds where CLP isn’t compiled

Unlike Arrow-Flight, this connector isn’t guarded with a feature flag. If a consumer builds Velox without velox_clp_connector, the link will fail.

Suggested adjustment:

-if(PRESTO_ENABLE_ARROW_FLIGHT_CONNECTOR)
+if(PRESTO_ENABLE_CLP_CONNECTOR)
+  target_link_libraries(presto_connectors velox_clp_connector)
+endif()
+
+if(PRESTO_ENABLE_ARROW_FLIGHT_CONNECTOR)
   add_subdirectory(arrow_flight)
   target_link_libraries(presto_connectors presto_flight_connector)
 endif()
-
-target_link_libraries(presto_connectors presto_velox_expr_conversion
-                      velox_type_fbhive velox_clp_connector)
+target_link_libraries(presto_connectors presto_velox_expr_conversion
+                      velox_type_fbhive)

This mirrors the existing Arrow-Flight pattern and keeps optional builds working.

📜 Review details

Configuration used: .coderabbit.yaml
Review profile: ASSERTIVE
Plan: Pro

📥 Commits

Reviewing files that changed from the base of the PR and between 9c86114 and 50563ee.

📒 Files selected for processing (24)
  • presto-native-execution/CMakeLists.txt (1 hunks)
  • presto-native-execution/presto_cpp/main/CMakeLists.txt (1 hunks)
  • presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt (1 hunks)
  • presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.cpp (3 hunks)
  • presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.h (2 hunks)
  • presto-native-execution/presto_cpp/main/connectors/Registration.cpp (3 hunks)
  • presto-native-execution/presto_cpp/main/tests/CMakeLists.txt (1 hunks)
  • presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt (3 hunks)
  • presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/Makefile (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/java-to-struct-json.py (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp (1 hunks)
  • presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h (1 hunks)
  • presto-native-execution/velox (1 hunks)
🧰 Additional context used
🧠 Learnings (3)
📓 Common learnings
Learnt from: wraymo
PR: y-scope/presto#15
File: presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java:22-33
Timestamp: 2025-06-13T12:56:06.325Z
Learning: `ClpMetadataProvider` is instantiated only once and used solely by the Presto coordinator, so concurrency/thread-safety guarantees are unnecessary.
presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h (1)
Learnt from: wraymo
PR: y-scope/presto#15
File: presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java:22-33
Timestamp: 2025-06-13T12:56:06.325Z
Learning: `ClpMetadataProvider` is instantiated only once and used solely by the Presto coordinator, so concurrency/thread-safety guarantees are unnecessary.
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml (1)
Learnt from: wraymo
PR: y-scope/presto#15
File: presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java:22-33
Timestamp: 2025-06-13T12:56:06.325Z
Learning: `ClpMetadataProvider` is instantiated only once and used solely by the Presto coordinator, so concurrency/thread-safety guarantees are unnecessary.
🧬 Code Graph Analysis (2)
presto-native-execution/presto_cpp/main/connectors/Registration.cpp (2)
presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.h (1)
  • registerPrestoToVeloxConnector (30-31)
presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.cpp (2)
  • registerPrestoToVeloxConnector (49-59)
  • registerPrestoToVeloxConnector (49-50)
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp (1)
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h (4)
  • ClpColumnHandle (43-43)
  • ClpSplit (56-56)
  • ClpTableHandle (66-66)
  • ClpTableLayoutHandle (77-77)
🪛 YAMLlint (1.37.1)
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml

[error] 18-18: too many spaces inside braces

(braces)


[error] 18-18: too many spaces after comma

(commas)


[error] 18-18: too many spaces inside braces

(braces)


[error] 23-23: too many spaces inside braces

(braces)


[error] 23-23: too many spaces after comma

(commas)


[error] 23-23: too many spaces inside braces

(braces)


[error] 28-28: too many spaces inside braces

(braces)


[error] 28-28: too many spaces after comma

(commas)


[error] 28-28: too many spaces inside braces

(braces)


[error] 33-33: too many spaces inside braces

(braces)


[error] 33-33: too many spaces after comma

(commas)


[error] 33-33: too many spaces inside braces

(braces)

⏰ Context from checks skipped due to timeout of 90000ms. You can increase the timeout in your CodeRabbit configuration to a maximum of 15 minutes (900000ms). (18)
  • GitHub Check: test (8.0.442, :presto-main)
  • GitHub Check: test (17.0.13, :presto-main-base)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-distributed-non-hash-gen)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-plan-determinism)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-resource-manager)
  • GitHub Check: test (17.0.13, :presto-tests -P presto-tests-execution-memory)
  • GitHub Check: test (8.0.442, :presto-main-base)
  • GitHub Check: test (17.0.13, :presto-main)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-aggregation-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-aggregation-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-distributed-non-hash-gen)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-local-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-plan-determinism)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-distributed-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P presto-tests-general)
  • GitHub Check: maven-checks (17.0.13)
  • GitHub Check: maven-checks (8.0.442)
  • GitHub Check: prestocpp-linux-build-for-test
🔇 Additional comments (32)
presto-native-execution/presto_cpp/main/CMakeLists.txt (1)

42-59: Verify velox_clp_connector is a proper library target and its dependencies are wired in

  • File: presto-native-execution/presto_cpp/main/CMakeLists.txt (lines 42–59)
  • There’s no local add_library(velox_clp_connector …) in this repo—this target must come from Velox’s exported CMake config.
  • Confirm in Velox that velox_clp_connector is exported as a STATIC or SHARED library (not an OBJECT/INTERFACE target).
  • Ensure all of its transitive dependencies (e.g. velox_dwio_*, velox_core) are pulled in.
  • If it turns out to be an OBJECT or INTERFACE target, either add
    add_dependencies(presto_server_lib velox_clp_connector)
    (as you do for velox_tpch_connector) or update Velox’s CMake to export it properly.
presto-native-execution/velox (1)

1-1: Floating Velox commit not on any tag or branch – please verify

No tags or remote branches contain commit 749d28944723ad746b60ca7073f42b3df9896ff4. Before merging, confirm that:

  • This commit is reachable via an immutable tag or long-lived branch expected by downstream builds.
  • CI passes for all existing connectors (not just CLP) against this Velox revision.
  • Any new APIs used by the native CLP connector were introduced in this commit or later, so older tags remain compatible.
presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp (1)

18-22: Addition is fine, but remember this compilation unit is marked deprecated

presto_protocol.cpp is explicitly scheduled for removal. Adding the CLP .cpp include here keeps the build green today, but the connector will silently vanish when this file is finally dropped—unless the generation template that replaces it also includes CLP.

Please double-check:

  1. The new code-gen path (the chevron template) contains the same include.
  2. There are no duplicate translation units for presto_protocol_clp.cpp, which could trigger ODR violations.
presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h (1)

19-22: Same caveat as the .cpp aggregator

Good to see the header aggregator updated, but make sure the upcoming replacement header (once this deprecated file is removed) also exports presto_protocol_clp.h. Otherwise downstream code will break.

presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp (1)

36-37: LGTM! Consistent test coverage for the new CLP connector.

The addition follows the established pattern for connector testing and ensures the CLP connector is properly covered by the existing test infrastructure.

presto-native-execution/presto_cpp/presto_protocol/java-to-struct-json.py (1)

173-173: Verified: license-header script path is correct

The updated path (../../velox/scripts/checks/license-header.py) resolves to presto-native-execution/velox/scripts/checks/license-header.py, which exists in the repo. No further changes are needed.

presto-native-execution/presto_cpp/main/connectors/Registration.cpp (3)

22-22: LGTM! Proper header inclusion for CLP connector.

The include follows the established pattern for connector headers.


49-53: LGTM! Consistent connector factory registration pattern.

The CLP connector factory registration follows the same pattern as other connectors, including the proper duplicate check using hasConnectorFactory().


84-85: LGTM! Proper PrestoToVeloxConnector registration.

The ClpPrestoToVeloxConnector registration follows the established pattern, using the factory's connector name constant for consistency.

presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc (1)

20-24: LGTM! JSON serialization follows the expected enum pattern.

The to_json function correctly serializes the ClpTransactionHandle as a JSON array containing the type and instance, which is appropriate for enum-like structures.

presto-native-execution/presto_cpp/presto_protocol/Makefile (3)

55-60: LGTM! Consistent build integration for CLP connector.

The build rules for CLP connector structs follow the established pattern used by other connectors, ensuring consistency in the build process.


68-68: LGTM! Proper JSON generation integration.

The CLP connector is correctly integrated into the JSON generation process, maintaining consistency with other connectors.


76-76: LGTM! Proper protobuf generation integration.

The CLP connector is correctly integrated into the protobuf generation process, maintaining consistency with other connectors.

presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc (3)

15-16: Good documentation of special handling.

The comment clearly explains why this handle requires special treatment due to its Java counterpart being an enum.


20-22: LGTM! Proper struct definition.

The struct correctly inherits from ConnectorTransactionHandle and has appropriate member initialization.


24-26: LGTM! Proper JSON serialization declarations.

The JSON serialization function declarations follow the established pattern used throughout the codebase.

presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc (3)

19-24: LGTM! Well-structured column handle definition.

The struct members are appropriately defined with proper types and default initialization.


31-32: LGTM! Proper JSON serialization declarations.

The JSON serialization function declarations follow the established pattern used throughout the codebase.


27-29: No change needed for dynamic_cast in ClpColumnHandle::operator<()

All connector implementations (Arrow, Hive, Iceberg, Tpch and Clp) use the same dynamic_cast-based pattern in their operator<() overrides, so this is a consistent design choice rather than an isolated issue. No modifications are required here.

presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h (1)

19-28: LGTM! Proper connector protocol definition.

The ClpConnectorProtocol type alias correctly specifies CLP-specific types and appropriately uses NotImplemented placeholders for unsupported features, following the established pattern.

presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.h (2)

16-16: LGTM! Include reordering for consistency.

The include reordering improves code organization and consistency.


227-252: LGTM! Well-structured CLP connector class.

The ClpPrestoToVeloxConnector class properly follows the established pattern of other connector classes, implementing all required virtual methods and correctly forwarding constructor parameters to the base class.

presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache (1)

29-31: Ignore hard-coded ClpTransactionHandle in this template
The JSON header templates for all connectors (Hive, TPCH, ArrowFlight, etc.) likewise include their transaction‐handle definitions directly. Having ClpTransactionHandle here is intentional and aligns with the existing pattern needed for correct JSON (de)serialization. No change required.

Likely an incorrect or invalid review comment.

presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json (1)

1-114: LGTM! Well-structured JSON schema for CLP connector protocol.

The JSON schema definitions are well-formed and follow established patterns. The field types and inheritance relationships are appropriately defined for the CLP connector protocol structures.

presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.cpp (6)

18-18: LGTM! Proper inclusion of CLP connector headers.

The new includes for CLP protocol and Velox connector components are correctly added following the established pattern of other connectors.

Also applies to: 24-26


1558-1568: LGTM! CLP split conversion implementation follows established patterns.

The toVeloxSplit method correctly converts CLP protocol splits to Velox connector splits with proper error handling and type validation.


1570-1582: LGTM! CLP column handle conversion is well-implemented.

The toVeloxColumnHandle method properly converts CLP protocol column handles to Velox column handles, using the type parser as expected.


1584-1603: LGTM! CLP table handle conversion follows connector patterns.

The toVeloxTableHandle method correctly extracts the table layout handle and creates the appropriate Velox table handle with proper validation.


1605-1608: LGTM! CLP connector protocol creation is consistent.

The createConnectorProtocol method follows the standard pattern established by other connectors.


1419-1424: LGTM! Improved formatting in Iceberg connector.

The formatting improvement with proper variable declaration and initialization enhances code readability.

presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp (1)

1-158: LGTM! Generated JSON serialization code follows established patterns.

The generated C++ code provides consistent JSON serialization/deserialization for all CLP protocol types. The default constructors properly set the type discriminator, and the serialization functions correctly handle all fields using the established helper function patterns.

presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h (1)

1-2: Ensure manual operator< implementations are correctly preserved by the generator

The generated header contains inlined manual code for operator< (lines 34 and 45), and there’s a separate inc file under special/ClpColumnHandle.hpp.inc (lines 16 and 27) holding the same implementation. However, presto_protocol_clp.h does not reference or include that inc file at build time.

Please verify that your Chevron-based generation pipeline actually inlines or merges special/ClpColumnHandle.hpp.inc when regenerating this header to avoid losing manual changes. If it does not, consider:

  • Moving all manual operator< code into special/ClpColumnHandle.hpp.inc only
  • Updating the Mustache template or generation script to explicitly pull in that inc file

Files to check:

  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h (manual code at lines 34, 45)
  • presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc (manual code at lines 16, 27)

Comment on lines 48 to 52
velox_hive_connector
velox_tpch_connector
velox_clp_connector
velox_presto_serializer
velox_functions_prestosql
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Same optional-build issue for test target

presto_server_test now always links against velox_clp_connector. Add the same if(PRESTO_ENABLE_CLP_CONNECTOR) guard to avoid link failures when CLP isn’t present.

🤖 Prompt for AI Agents
In presto-native-execution/presto_cpp/main/tests/CMakeLists.txt around lines 48
to 52, the test target presto_server_test always links against
velox_clp_connector, which causes link failures if the CLP connector is not
enabled. Wrap the addition of velox_clp_connector in an
if(PRESTO_ENABLE_CLP_CONNECTOR) conditional block to ensure it is only linked
when the CLP connector is present, preventing build errors.

Comment on lines 28 to 32
velox_hive_connector
velox_tpch_connector
velox_clp_connector
velox_exec
velox_dwio_common_exception
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Guard CLP linkage in presto_velox_split_test

Add a feature flag guard around velox_clp_connector just like the main connector target.

🤖 Prompt for AI Agents
In presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt around
lines 28 to 32, the dependency on velox_clp_connector in the
presto_velox_split_test target is not guarded by a feature flag. Add a
conditional check for the CLP feature flag around the inclusion of
velox_clp_connector, similar to how it is done in the main connector target, to
ensure the linkage only occurs when the feature is enabled.

Comment on lines 99 to 103
velox_hive_connector
velox_tpch_connector
velox_clp_connector
GTest::gtest
GTest::gtest_main)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Guard CLP linkage in presto_to_velox_connector_test

Mirror the conditional linkage pattern so builds that exclude CLP stay healthy.

🤖 Prompt for AI Agents
In presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt around
lines 99 to 103, the target_link_libraries for presto_to_velox_connector_test
always includes velox_clp_connector, which can cause build failures if CLP is
excluded. Modify the CMakeLists.txt to conditionally link velox_clp_connector
only if CLP is enabled, mirroring the existing pattern used elsewhere to guard
CLP linkage and ensure builds without CLP remain healthy.

Comment on lines 66 to 70
velox_hive_connector
velox_tpch_connector
velox_clp_connector
velox_hive_partition_function
velox_presto_serializer
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Guard CLP linkage in presto_expressions_test

Same optional-build concern as above.

🤖 Prompt for AI Agents
In presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt around
lines 66 to 70, the linkage of the velox_clp_connector in the
presto_expressions_test target is not guarded for optional builds. Modify the
CMakeLists.txt to conditionally link velox_clp_connector only if the CLP
component is enabled, using appropriate CMake conditional statements to prevent
build errors when CLP is not included.

Comment on lines +34 to +36
"-Wno-nullability-completeness \
-Wno-deprecated-declarations \
-Wno-restrict")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Verify the necessity of suppressing the -Wrestrict warning.

The addition of -Wno-restrict to the disabled warnings list suggests that the new CLP connector code triggers this warning. Please verify that this warning suppression is necessary and doesn't hide important issues.


🏁 Script executed:

#!/bin/bash
# Description: Check if the CLP connector code triggers -Wrestrict warnings
# Expected: Find instances where this warning would be triggered

# Search for patterns that commonly trigger -Wrestrict warnings in CLP-related files
echo "Searching for potential -Wrestrict triggers in CLP connector code..."
rg -A 3 -B 3 "strcpy|strncpy|sprintf|memcpy|memmove" --type cpp --glob "*clp*" || true
rg -A 3 -B 3 "restrict|__restrict" --type cpp --glob "*clp*" || true

Length of output: 304


Remove unnecessary -Wno-restrict suppression

No usage patterns (e.g. strcpy, memcpy, or pointer qualifiers like restrict) were found in the CLP connector code that would trigger a -Wrestrict warning. To avoid hiding potential issues, please remove the -Wno-restrict entry.

• File to update: presto-native-execution/CMakeLists.txt (around lines 34–36)
• Suggested diff:

-    "-Wno-nullability-completeness \
-     -Wno-deprecated-declarations \
-     -Wno-restrict")
+    "-Wno-nullability-completeness \
+     -Wno-deprecated-declarations")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
"-Wno-nullability-completeness \
-Wno-deprecated-declarations \
-Wno-restrict")
"-Wno-nullability-completeness \
-Wno-deprecated-declarations")
🤖 Prompt for AI Agents
In presto-native-execution/CMakeLists.txt around lines 34 to 36, remove the
compiler flag '-Wno-restrict' from the list of warning suppressions since no
code in the CLP connector triggers this warning. This will prevent hiding
potential issues related to restrict usage.

{{/struct}}
{{#enum}}
namespace facebook::presto::protocol::clp {
//Loosly copied this here from NLOHMANN_JSON_SERIALIZE_ENUM()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Fix typo in comment.

Minor typo in the comment.

-    //Loosly copied this here from NLOHMANN_JSON_SERIALIZE_ENUM()
+    //Loosely copied this here from NLOHMANN_JSON_SERIALIZE_ENUM()
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
//Loosly copied this here from NLOHMANN_JSON_SERIALIZE_ENUM()
//Loosely copied this here from NLOHMANN_JSON_SERIALIZE_ENUM()
🤖 Prompt for AI Agents
In
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache
at line 75, fix the typo in the comment by changing "Loosly" to "Loosely" to
correct the spelling.

j[0].get_to(p._type);
j[1].get_to(p.instance);
}
} // namespace facebook::presto::protocol
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix incorrect namespace in closing comment.

The namespace closing comment doesn't match the actual namespace.

-} // namespace facebook::presto::protocol
+} // namespace facebook::presto::protocol::clp
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
} // namespace facebook::presto::protocol
} // namespace facebook::presto::protocol::clp
🤖 Prompt for AI Agents
In
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache
at line 39, the closing comment for the namespace is incorrect. Update the
comment to match the actual namespace being closed to ensure clarity and
correctness.

Comment on lines +35 to +38
void from_json(const json& j, ClpTransactionHandle& p) {
j[0].get_to(p._type);
j[1].get_to(p.instance);
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add bounds checking for JSON array deserialization.

The from_json function assumes the JSON array has at least 2 elements without validation, which could cause out-of-bounds access.

 void from_json(const json& j, ClpTransactionHandle& p) {
+    if (!j.is_array() || j.size() < 2) {
+        throw ParseError("ClpTransactionHandle requires JSON array with at least 2 elements");
+    }
     j[0].get_to(p._type);
     j[1].get_to(p.instance);
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
void from_json(const json& j, ClpTransactionHandle& p) {
j[0].get_to(p._type);
j[1].get_to(p.instance);
}
void from_json(const json& j, ClpTransactionHandle& p) {
if (!j.is_array() || j.size() < 2) {
throw ParseError("ClpTransactionHandle requires JSON array with at least 2 elements");
}
j[0].get_to(p._type);
j[1].get_to(p.instance);
}
🤖 Prompt for AI Agents
In
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache
around lines 35 to 38, the from_json function accesses JSON array elements
without checking if they exist, risking out-of-bounds errors. Add bounds
checking to verify the JSON array has at least two elements before accessing
j[0] and j[1], and handle the case where the array is too short, such as by
throwing an exception or returning an error.

Comment on lines +87 to +93
const auto* it = std::find_if(std::begin({{&class_name}}_enum_table), std::end({{&class_name}}_enum_table),
[e](const std::pair<{{&class_name}}, json>& ej_pair) -> bool
{
return ej_pair.first == e;
});
j = ((it != std::end({{&class_name}}_enum_table)) ? it : std::begin({{&class_name}}_enum_table))->second;
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider throwing exception for invalid enum values.

The current implementation silently falls back to the first enum value when an invalid value is encountered, which could mask deserialization errors.

     void to_json(json& j, const {{&class_name}}& e)
     {
         static_assert(std::is_enum<{{&class_name}}>::value, "{{&class_name}} must be an enum!");
         const auto* it = std::find_if(std::begin({{&class_name}}_enum_table), std::end({{&class_name}}_enum_table),
                                [e](const std::pair<{{&class_name}}, json>& ej_pair) -> bool
         {
             return ej_pair.first == e;
         });
-        j = ((it != std::end({{&class_name}}_enum_table)) ? it : std::begin({{&class_name}}_enum_table))->second;
+        if (it != std::end({{&class_name}}_enum_table)) {
+            j = it->second;
+        } else {
+            throw std::invalid_argument("Invalid {{&class_name}} enum value: " + std::to_string(static_cast<int>(e)));
+        }
     }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
const auto* it = std::find_if(std::begin({{&class_name}}_enum_table), std::end({{&class_name}}_enum_table),
[e](const std::pair<{{&class_name}}, json>& ej_pair) -> bool
{
return ej_pair.first == e;
});
j = ((it != std::end({{&class_name}}_enum_table)) ? it : std::begin({{&class_name}}_enum_table))->second;
}
void to_json(json& j, const {{&class_name}}& e)
{
static_assert(std::is_enum<{{&class_name}}>::value, "{{&class_name}} must be an enum!");
const auto* it = std::find_if(
std::begin({{&class_name}}_enum_table),
std::end({{&class_name}}_enum_table),
[e](const std::pair<{{&class_name}}, json>& ej_pair) -> bool {
return ej_pair.first == e;
});
if (it != std::end({{&class_name}}_enum_table)) {
j = it->second;
} else {
throw std::invalid_argument(
"Invalid {{&class_name}} enum value: " +
std::to_string(static_cast<int>(e)));
}
}
🤖 Prompt for AI Agents
In
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache
around lines 87 to 93, the code currently falls back to the first enum value if
an invalid enum is found, which hides errors. Modify the code to check if the
iterator equals the end of the enum table, and if so, throw an exception
indicating an invalid enum value instead of defaulting to the first element.
This will ensure deserialization errors are properly surfaced.

Comment on lines +45 to +47
bool operator<(const ColumnHandle& o) const override {
return columnName < dynamic_cast<const ClpColumnHandle&>(o).columnName;
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Incomplete comparison operator may cause issues in sorted containers.

The operator< only compares columnName, ignoring other fields like originalColumnName, columnType, and nullable. This could lead to incorrect behavior when ClpColumnHandle objects are used in sorted containers or algorithms, as two handles with the same columnName but different other fields would be considered equal.

Consider implementing a complete comparison that includes all relevant fields:

   bool operator<(const ColumnHandle& o) const override {
-    return columnName < dynamic_cast<const ClpColumnHandle&>(o).columnName;
+    const auto& other = dynamic_cast<const ClpColumnHandle&>(o);
+    if (columnName != other.columnName) return columnName < other.columnName;
+    if (originalColumnName != other.originalColumnName) return originalColumnName < other.originalColumnName;
+    if (columnType != other.columnType) return columnType < other.columnType;
+    return nullable < other.nullable;
   }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
bool operator<(const ColumnHandle& o) const override {
return columnName < dynamic_cast<const ClpColumnHandle&>(o).columnName;
}
bool operator<(const ColumnHandle& o) const override {
const auto& other = dynamic_cast<const ClpColumnHandle&>(o);
if (columnName != other.columnName) return columnName < other.columnName;
if (originalColumnName != other.originalColumnName) return originalColumnName < other.originalColumnName;
if (columnType != other.columnType) return columnType < other.columnType;
return nullable < other.nullable;
}
🤖 Prompt for AI Agents
In
presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h
around lines 45 to 47, the operator< only compares columnName, ignoring other
important fields like originalColumnName, columnType, and nullable. To fix this,
update the operator< to perform a lexicographical comparison of all these fields
in a consistent order, ensuring that all relevant members are compared to
correctly distinguish different ClpColumnHandle objects in sorted containers.

@anlowee
Copy link
Author

anlowee commented Jul 11, 2025

        presto_expressions_test
        presto_protocol_test
        presto_http_filter_test
        presto_function_metadata_test
        presto_common_test
        presto_http_test
        presto_operators_test
        presto_server_test

100% tests passed, 0 tests failed out of 8

Total Test time (real) = 417.92 sec

The unit test can be passed from my end.

Copy link
Member

@kirkrodrigues kirkrodrigues left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned offline, shouldn't we guard the CLP connector with an option like PRESTO_ENABLE_ARROW_FLIGHT_CONNECTOR?

Comment on lines 20 to 21
target_link_libraries(presto_connectors presto_velox_expr_conversion
velox_type_fbhive)
velox_type_fbhive velox_clp_connector)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need this? If so, let's alphabetize the list of libraries to link against.

Comment on lines 36 to 37
connectorList.emplace_back(
std::pair("clp", std::make_unique<ClpPrestoToVeloxConnector>("clp")));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To alphabetize, can we move this above line 27?

Copy link

@coderabbitai coderabbitai bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actionable comments posted: 1

♻️ Duplicate comments (4)
presto-native-execution/presto_cpp/main/tests/CMakeLists.txt (1)

48-50: Conditionally link velox_clp_connector so non-CLP builds don’t break

Same feedback as the earlier review: wrap the link line in an if(TARGET velox_clp_connector) (or a dedicated feature flag) to prevent link-time failures when the CLP connector is not built.

presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt (3)

24-31: Guard CLP linkage in presto_velox_split_test

The test unconditionally links against velox_clp_connector; add a target/option guard to keep optional builds functional.


64-70: Guard CLP linkage in presto_expressions_test

Same optional-build concern—enclose the CLP connector dependency in a conditional block.


96-102: Guard CLP linkage in presto_to_velox_connector_test

Repeat of the above: protect the link line with a feature-flag or if(TARGET …) check.

📜 Review details

Configuration used: .coderabbit.yaml
Review profile: ASSERTIVE
Plan: Pro

📥 Commits

Reviewing files that changed from the base of the PR and between 5ff4aa8 and 46155a7.

📒 Files selected for processing (3)
  • presto-native-execution/CMakeLists.txt (1 hunks)
  • presto-native-execution/presto_cpp/main/tests/CMakeLists.txt (1 hunks)
  • presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt (3 hunks)
🧰 Additional context used
🧠 Learnings (1)
📓 Common learnings
Learnt from: wraymo
PR: y-scope/presto#15
File: presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java:22-33
Timestamp: 2025-06-13T12:56:06.325Z
Learning: `ClpMetadataProvider` is instantiated only once and used solely by the Presto coordinator, so concurrency/thread-safety guarantees are unnecessary.
⏰ Context from checks skipped due to timeout of 90000ms. You can increase the timeout in your CodeRabbit configuration to a maximum of 15 minutes (900000ms). (20)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-distributed-queries)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-resource-manager)
  • GitHub Check: test (17.0.13, :presto-main-base)
  • GitHub Check: test (8.0.442, :presto-main-base)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-plan-determinism)
  • GitHub Check: test (17.0.13, :presto-tests -P presto-tests-general)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-plan-determinism)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-resource-manager)
  • GitHub Check: test (8.0.442, :presto-main)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-tpch-distributed-queries)
  • GitHub Check: test (17.0.13, :presto-tests -P ci-only-local-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-distributed-non-hash-gen)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-aggregation-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-distributed-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-tpch-distributed-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P ci-only-local-queries)
  • GitHub Check: test (8.0.442, :presto-tests -P presto-tests-general)
  • GitHub Check: maven-checks (8.0.442)
  • GitHub Check: maven-checks (17.0.13)
  • GitHub Check: prestocpp-linux-build-for-test

Comment on lines +32 to +38
# Known warnings that are benign can be disabled:
# - `restrict` since it triggers a bug in gcc 12. See
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105651
set(DISABLED_WARNINGS
"-Wno-nullability-completeness -Wno-deprecated-declarations")
"-Wno-nullability-completeness \
-Wno-deprecated-declarations \
-Wno-restrict")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Gate -Wno-restrict to GCC 12 only (avoid masking real issues under other tool-chains)

-Wno-restrict suppresses a real warning class and should be disabled only for the compiler/versions that mis-fire (GCC 12, bug 105651). Applying it unconditionally hides legitimate misuse of restrict for Clang, GCC 13+, etc.

-     -Wno-deprecated-declarations \
-     -Wno-restrict")
+     -Wno-deprecated-declarations")
+
+# Suppress -Wrestrict only for the GCC 12 series (see
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105651).  Other
+# compilers/versions still benefit from the diagnostic.
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
+    AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12
+    AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13)
+  list(APPEND DISABLED_WARNINGS "-Wno-restrict")
+endif()

This keeps the workaround while preserving the warning where it is still valuable.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Known warnings that are benign can be disabled:
# - `restrict` since it triggers a bug in gcc 12. See
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105651
set(DISABLED_WARNINGS
"-Wno-nullability-completeness -Wno-deprecated-declarations")
"-Wno-nullability-completeness \
-Wno-deprecated-declarations \
-Wno-restrict")
# Known warnings that are benign can be disabled:
# - `restrict` since it triggers a bug in gcc 12. See
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105651
set(DISABLED_WARNINGS
"-Wno-nullability-completeness \
-Wno-deprecated-declarations")
# Suppress -Wrestrict only for the GCC 12 series (see
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105651). Other
# compilers/versions still benefit from the diagnostic.
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12
AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13)
list(APPEND DISABLED_WARNINGS "-Wno-restrict")
endif()
🤖 Prompt for AI Agents
In presto-native-execution/CMakeLists.txt around lines 32 to 38, the
-Wno-restrict warning suppression is applied unconditionally, which hides
legitimate warnings on compilers other than GCC 12. Modify the CMakeLists.txt to
apply the -Wno-restrict flag only when the compiler is GCC version 12 by adding
a conditional check for the compiler and version before including this flag in
DISABLED_WARNINGS.

@anlowee anlowee requested a review from kirkrodrigues July 13, 2025 19:10
Copy link
Member

@kirkrodrigues kirkrodrigues left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the PR title, how about:

feat: Add Prestissimo support for the CLP connector and update to the latest y-scope/velox commit.

@anlowee anlowee changed the title feat: Add CLP connector native code (copy). feat: Add Prestissimo support for the CLP connector and update to the latest y-scope/velox commit. Jul 13, 2025
@anlowee anlowee merged commit 20d8f9c into y-scope:release-0.293-clp-connector Jul 13, 2025
38 of 39 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants