Skip to content

Commit 18140ac

Browse files
authored
Feature - Compute API (#8)
* Basic Impl for Compute Endpoints * Compute Initialize * Auto Versioning for Tags and Releases * Brief Docs and Compute Naming * Retry Logic for Http Client and Compute Unit Tests * Update ReadMe & Github Workflows
1 parent 9db8351 commit 18140ac

File tree

11 files changed

+1006
-26
lines changed

11 files changed

+1006
-26
lines changed

.github/workflows/release.yml

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
name: Auto Release
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
jobs:
9+
release:
10+
runs-on: ubuntu-latest
11+
permissions:
12+
contents: write
13+
14+
steps:
15+
- name: Checkout code
16+
uses: actions/checkout@v4
17+
with:
18+
fetch-depth: 0
19+
token: ${{ secrets.GITHUB_TOKEN }}
20+
21+
- name: Get current version and calculate next version
22+
id: version
23+
run: |
24+
# Get current version from CMakeLists.txt
25+
CURRENT_VERSION=$(grep -oP 'VERSION \K[0-9]+\.[0-9]+\.[0-9]+' CMakeLists.txt | head -1)
26+
echo "Current version in CMakeLists.txt: $CURRENT_VERSION"
27+
28+
# Get latest tag
29+
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
30+
LATEST_VERSION=${LATEST_TAG#v}
31+
echo "Latest git tag: $LATEST_TAG ($LATEST_VERSION)"
32+
33+
# Parse current version
34+
IFS='.' read -r -a VERSION_PARTS <<< "$CURRENT_VERSION"
35+
MAJOR="${VERSION_PARTS[0]}"
36+
MINOR="${VERSION_PARTS[1]}"
37+
PATCH="${VERSION_PARTS[2]}"
38+
39+
# Auto-increment patch version
40+
PATCH=$((PATCH + 1))
41+
NEW_VERSION="$MAJOR.$MINOR.$PATCH"
42+
43+
echo "New version: $NEW_VERSION"
44+
echo "version=$NEW_VERSION" >> $GITHUB_OUTPUT
45+
echo "tag=v$NEW_VERSION" >> $GITHUB_OUTPUT
46+
echo "prev_tag=$LATEST_TAG" >> $GITHUB_OUTPUT
47+
48+
- name: Update version in all files
49+
run: |
50+
VERSION="${{ steps.version.outputs.version }}"
51+
echo "Updating version to $VERSION in all files..."
52+
53+
# Update CMakeLists.txt
54+
sed -i "s/VERSION [0-9]\+\.[0-9]\+\.[0-9]\+/VERSION $VERSION/" CMakeLists.txt
55+
56+
# Update Doxyfile
57+
sed -i "s/PROJECT_NUMBER[[:space:]]*=[[:space:]]*\"[0-9]\+\.[0-9]\+\.[0-9]\+\"/PROJECT_NUMBER = \"$VERSION\"/" Doxyfile
58+
59+
# Update vcpkg.json
60+
sed -i "s/\"version\":[[:space:]]*\"[0-9]\+\.[0-9]\+\.[0-9]\+\"/\"version\": \"$VERSION\"/" vcpkg.json
61+
62+
# Update packaging/vcpkg-port/vcpkg.json
63+
sed -i "s/\"version\":[[:space:]]*\"[0-9]\+\.[0-9]\+\.[0-9]\+\"/\"version\": \"$VERSION\"/" packaging/vcpkg-port/vcpkg.json
64+
65+
# Update include/databricks/version.h
66+
sed -i "s/constexpr const char \*VERSION = \"[0-9]\+\.[0-9]\+\.[0-9]\+\"/constexpr const char *VERSION = \"$VERSION\"/" include/databricks/version.h
67+
68+
# Update README.md - Latest Release line
69+
sed -i "s/\*\*Latest Release\*\*:[[:space:]]*\[v[0-9]\+\.[0-9]\+\.[0-9]\+\]/**Latest Release**: [v$VERSION]/" README.md
70+
sed -i "s|/releases/tag/v[0-9]\+\.[0-9]\+\.[0-9]\+|/releases/tag/v$VERSION|" README.md
71+
72+
echo "Version updated to $VERSION in all files"
73+
74+
- name: Generate release notes
75+
id: notes
76+
run: |
77+
PREV_TAG="${{ steps.version.outputs.prev_tag }}"
78+
79+
if [ "$PREV_TAG" = "v0.0.0" ]; then
80+
# First release - get all commits
81+
COMMITS=$(git log --pretty=format:"- %s (%h)" --reverse)
82+
else
83+
# Get commits since last tag
84+
COMMITS=$(git log ${PREV_TAG}..HEAD --pretty=format:"- %s (%h)" --reverse)
85+
fi
86+
87+
# Create release notes
88+
cat > release_notes.md <<EOF
89+
## What's Changed
90+
91+
$COMMITS
92+
93+
---
94+
95+
**Full Changelog**: https://github.com/${{ github.repository }}/compare/${PREV_TAG}...v${{ steps.version.outputs.version }}
96+
EOF
97+
98+
echo "Generated release notes:"
99+
cat release_notes.md
100+
101+
- name: Commit version bump
102+
run: |
103+
git config user.name "github-actions[bot]"
104+
git config user.email "github-actions[bot]@users.noreply.github.com"
105+
git add CMakeLists.txt Doxyfile vcpkg.json packaging/vcpkg-port/vcpkg.json include/databricks/version.h README.md
106+
git commit -m "chore: bump version to ${{ steps.version.outputs.version }}"
107+
git push origin main
108+
109+
- name: Create and push tag
110+
run: |
111+
git tag -a "v${{ steps.version.outputs.version }}" -m "Release v${{ steps.version.outputs.version }}"
112+
git push origin "v${{ steps.version.outputs.version }}"
113+
114+
- name: Create GitHub Release
115+
uses: softprops/action-gh-release@v2
116+
with:
117+
tag_name: v${{ steps.version.outputs.version }}
118+
name: Release v${{ steps.version.outputs.version }}
119+
body_path: release_notes.md
120+
draft: false
121+
prerelease: false
122+
env:
123+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ set(SOURCES
9494
src/core/client.cpp
9595
src/core/config.cpp
9696
src/jobs/jobs.cpp
97+
src/compute/compute_types.cpp
98+
src/compute/compute.cpp
9799
src/connection_pool.cpp
98100
src/internal/pool_manager.cpp
99101
src/internal/logger.cpp
@@ -106,6 +108,8 @@ set(HEADERS
106108
include/databricks/connection_pool.h
107109
include/databricks/version.h
108110
include/databricks/jobs/jobs.h
111+
include/databricks/compute/compute.h
112+
include/databricks/compute/compute_types.h
109113
)
110114

111115
# Internal headers (not installed)

README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,43 @@ int main() {
631631

632632
For a complete example, see `examples/basic/jobs_example.cpp`.
633633

634+
### Compute/Clusters API
635+
636+
Manage Databricks compute clusters programmatically:
637+
638+
```cpp
639+
#include <databricks/compute/compute.h>
640+
#include <databricks/core/config.h>
641+
642+
int main() {
643+
databricks::AuthConfig auth = databricks::AuthConfig::from_environment();
644+
databricks::Compute compute(auth);
645+
646+
// List clusters
647+
auto clusters = compute.list_compute();
648+
for (const auto& c : clusters) {
649+
std::cout << c.cluster_name << " [" << c.state << "]" << std::endl;
650+
}
651+
652+
// Lifecycle management
653+
compute.start_compute("cluster-id");
654+
compute.restart_compute("cluster-id");
655+
compute.terminate_compute("cluster-id");
656+
657+
return 0;
658+
}
659+
```
660+
661+
**Features:**
662+
- List/get cluster details
663+
- Start, restart, and terminate clusters
664+
- Cluster state tracking (PENDING, RUNNING, TERMINATED, etc.)
665+
- Automatic HTTP retry logic with exponential backoff
666+
667+
**HTTP Retry Logic:**
668+
669+
All REST API calls automatically retry on transient failures (408, 429, 500-504) with exponential backoff (1s, 2s, 4s). This is built into the HTTP client and requires no configuration
670+
634671
### Direct ConnectionPool Management
635672

636673
For advanced users who need fine-grained control over connection pools:
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#pragma once
2+
3+
#include "databricks/core/config.h"
4+
#include "databricks/compute/compute_types.h"
5+
6+
#include <string>
7+
#include <vector>
8+
#include <memory>
9+
10+
namespace databricks {
11+
/**
12+
* @brief Client for interacting with the Databricks Clusters/Compute API
13+
*
14+
* The Clusters API allows you to create, manage, and control compute clusters
15+
* in your Databricks workspace. This implementation uses Clusters API 2.0.
16+
*
17+
* Example usage:
18+
* @code
19+
* databricks::AuthConfig auth = databricks::AuthConfig::from_environment();
20+
* databricks::Compute compute(auth);
21+
*
22+
* // List all compute clusters
23+
* auto cluster_list = compute.list_compute();
24+
*
25+
* // Get specific compute cluster details
26+
* auto cluster = compute.get_compute("1234-567890-abcde123");
27+
*
28+
* // Start a terminated compute cluster
29+
* compute.start_compute("1234-567890-abcde123");
30+
* @endcode
31+
*/
32+
class Compute {
33+
public:
34+
/**
35+
* @brief Construct a Compute API client
36+
* @param auth Authentication configuration with host and token
37+
*/
38+
explicit Compute(const AuthConfig& auth);
39+
40+
/**
41+
* @brief Destructor
42+
*/
43+
~Compute();
44+
45+
// Disable copy
46+
Compute(const Compute&) = delete;
47+
Compute& operator=(const Compute&) = delete;
48+
49+
/**
50+
* @brief List all compute clusters in the workspace
51+
*
52+
* @return Vector of Cluster objects
53+
* @throws std::runtime_error if the API request fails
54+
*/
55+
std::vector<Cluster> list_compute();
56+
57+
/**
58+
* @brief Get detailed information about a specific compute cluster
59+
*
60+
* @param cluster_id The unique identifier of the cluster
61+
* @return Cluster object with full details
62+
* @throws std::runtime_error if the cluster is not found or the API request fails
63+
*/
64+
Cluster get_compute(const std::string& cluster_id);
65+
66+
/**
67+
* @brief Start a terminated compute cluster
68+
*
69+
* @param cluster_id The unique identifier of the cluster to start
70+
* @return true if the operation was successful
71+
* @throws std::runtime_error if the API request fails
72+
*/
73+
bool start_compute(const std::string& cluster_id);
74+
75+
/**
76+
* @brief Terminate a running compute cluster
77+
*
78+
* @param cluster_id The unique identifier of the cluster to terminate
79+
* @return true if the operation was successful
80+
* @throws std::runtime_error if the API request fails
81+
*
82+
* @note This stops the cluster but does not permanently delete it.
83+
* Terminated clusters can be restarted.
84+
*/
85+
bool terminate_compute(const std::string& cluster_id);
86+
87+
/**
88+
* @brief Restart a compute cluster
89+
*
90+
* @param cluster_id The unique identifier of the cluster to restart
91+
* @return true if the operation was successful
92+
* @throws std::runtime_error if the API request fails
93+
*
94+
* @note This will terminate and then start the cluster with the same configuration.
95+
*/
96+
bool restart_compute(const std::string& cluster_id);
97+
98+
private:
99+
class Impl;
100+
std::unique_ptr<Impl> pimpl_;
101+
102+
bool compute_operation(const std::string& cluster_id, const std::string& endpoint, const std::string& operation_name);
103+
static std::vector<Cluster> parse_compute_list(const std::string& json_str);
104+
static Cluster parse_compute(const std::string& json_str);
105+
};
106+
} // namespace databricks
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#pragma once
2+
3+
#include <string>
4+
#include <map>
5+
#include <cstdint>
6+
7+
namespace databricks {
8+
9+
/**
10+
* @brief Enumeration of possible cluster lifecycle states
11+
*
12+
* Represents the various states a Databricks cluster can be in during its lifecycle.
13+
*/
14+
enum class ClusterStateEnum {
15+
PENDING, ///< Cluster is being created
16+
RUNNING, ///< Cluster is running and ready for use
17+
RESTARTING, ///< Cluster is restarting
18+
RESIZING, ///< Cluster is being resized
19+
TERMINATING, ///< Cluster is being terminated
20+
TERMINATED, ///< Cluster has been terminated
21+
ERROR, ///< Cluster is in an error state
22+
UNKNOWN ///< Unknown or unrecognized state
23+
};
24+
25+
/**
26+
* @brief Parse a cluster state string into ClusterStateEnum
27+
* @param state_str String representation of the cluster state
28+
* @return ClusterStateEnum corresponding to the string
29+
*/
30+
ClusterStateEnum parse_cluster_state(const std::string& state_str);
31+
32+
/**
33+
* @brief Convert ClusterStateEnum to string representation
34+
* @param state ClusterStateEnum value
35+
* @return String representation of the state
36+
*/
37+
std::string cluster_state_to_string(ClusterStateEnum state);
38+
39+
/**
40+
* @brief Represents a Databricks cluster
41+
*
42+
* Clusters are compute resources used to run notebooks, jobs, and other workloads.
43+
* This struct contains the core metadata about a cluster configuration and its state.
44+
*/
45+
struct Cluster {
46+
std::string cluster_id; ///< Unique identifier for the cluster
47+
std::string cluster_name; ///< Display name of the cluster
48+
std::string state; ///< Current lifecycle state (e.g., "RUNNING", "TERMINATED")
49+
std::string creator_user_name; ///< Username of the cluster creator
50+
uint64_t start_time = 0; ///< Unix timestamp in milliseconds when cluster started
51+
uint64_t terminated_time = 0; ///< Unix timestamp in milliseconds when cluster terminated (0 if running)
52+
std::string spark_version; ///< Spark runtime version (e.g., "11.3.x-scala2.12")
53+
std::string node_type_id; ///< Cloud provider instance type (e.g., "i3.xlarge")
54+
int num_workers = 0; ///< Number of worker nodes in the cluster
55+
std::map<std::string, std::string> custom_tags; ///< User-defined tags for organization and tracking
56+
};
57+
58+
/**
59+
* @brief Represents detailed state information for a cluster
60+
*
61+
* Provides more granular state information including state messages
62+
* that can help diagnose issues or understand cluster status.
63+
*/
64+
struct ClusterState {
65+
std::string cluster_id; ///< Unique identifier for the cluster
66+
ClusterStateEnum cluster_state = ClusterStateEnum::UNKNOWN; ///< Enumerated state value (default: UNKNOWN)
67+
std::string state_message; ///< Human-readable message describing the state
68+
69+
/**
70+
* @brief Parse a ClusterState from JSON string
71+
* @param json_str JSON representation of cluster state
72+
* @return Parsed ClusterState object
73+
* @throws std::runtime_error if parsing fails
74+
*/
75+
static ClusterState from_json(const std::string& json_str);
76+
};
77+
78+
} // namespace databricks

0 commit comments

Comments
 (0)