Skip to content

Commit e701ef2

Browse files
authored
Merge branch 'master' into skip_gpu_reservation
2 parents 992427c + ce61412 commit e701ef2

File tree

257 files changed

+32812
-532
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

257 files changed

+32812
-532
lines changed

.github/workflows/packaging-pipeline.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ jobs:
5656
- target: x86_64-unknown-linux-musl
5757
os: ubuntu-22.04
5858
- target: x86_64-apple-darwin
59-
os: macos-13
59+
os: macos-14
6060
- target: aarch64-apple-darwin
61-
os: macos-13
61+
os: macos-14
6262
runs-on: ${{ matrix.os }}
6363
outputs:
6464
rust_binaries_artifact: rust-binaries-${{ github.sha }}
@@ -86,7 +86,7 @@ jobs:
8686
fi
8787
8888
- name: Install dependencies (macOS)
89-
if: matrix.os == 'macos-13'
89+
if: matrix.os == 'macos-14'
9090
run: |
9191
brew install protobuf
9292
@@ -108,7 +108,7 @@ jobs:
108108
cp rust/target/${{ matrix.target }}/release/openrqd release/openrqd-${{ env.BUILD_ID }}-${{ matrix.target }}
109109
110110
- name: Copy binaries (macOS)
111-
if: matrix.os == 'macos-13'
111+
if: matrix.os == 'macos-14'
112112
run: |
113113
cp rust/target/${{ matrix.target }}/release/openrqd release/openrqd-${{ env.BUILD_ID }}-${{ matrix.target }}
114114

.github/workflows/release-pipeline.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ jobs:
8181
- target: x86_64-unknown-linux-musl
8282
os: ubuntu-22.04
8383
- target: x86_64-apple-darwin
84-
os: macos-13
84+
os: macos-14
8585
- target: aarch64-apple-darwin
86-
os: macos-13
86+
os: macos-14
8787
runs-on: ${{ matrix.os }}
8888
steps:
8989
- uses: actions/checkout@v4
@@ -121,7 +121,7 @@ jobs:
121121
fi
122122
123123
- name: Install dependencies (macOS)
124-
if: matrix.os == 'macos-13'
124+
if: matrix.os == 'macos-14'
125125
run: |
126126
brew install protobuf
127127
@@ -143,7 +143,7 @@ jobs:
143143
cp rust/target/${{ matrix.target }}/release/openrqd release/openrqd-${{ env.BUILD_ID }}-${{ matrix.target }}
144144
145145
- name: Copy binaries (macOS)
146-
if: matrix.os == 'macos-13'
146+
if: matrix.os == 'macos-14'
147147
run: |
148148
cp rust/target/${{ matrix.target }}/release/openrqd release/openrqd-${{ env.BUILD_ID }}-${{ matrix.target }}
149149

.github/workflows/testing-pipeline.yml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,46 @@ jobs:
391391
with:
392392
name: rest-gateway-coverage
393393
path: rest_gateway/opencue_gateway/coverage.out
394+
395+
build_cueweb:
396+
name: Build CueWeb Docker Image
397+
runs-on: ubuntu-22.04
398+
steps:
399+
- name: Checkout
400+
uses: actions/checkout@v4
401+
with:
402+
fetch-depth: 0
403+
fetch-tags: true
404+
405+
- name: Mark repository as safe
406+
run: git config --global --add safe.directory $GITHUB_WORKSPACE
407+
408+
- name: Set up Docker Buildx
409+
uses: docker/setup-buildx-action@v3
410+
411+
- name: Build CueWeb Docker image
412+
working-directory: cueweb
413+
run: |
414+
docker buildx build . -f Dockerfile -t opencue/cueweb:test --load
415+
echo "CueWeb Docker image built successfully"
416+
417+
build_rest_gateway:
418+
name: Build REST Gateway Docker Image
419+
runs-on: ubuntu-22.04
420+
steps:
421+
- name: Checkout
422+
uses: actions/checkout@v4
423+
with:
424+
fetch-depth: 0
425+
fetch-tags: true
426+
427+
- name: Mark repository as safe
428+
run: git config --global --add safe.directory $GITHUB_WORKSPACE
429+
430+
- name: Set up Docker Buildx
431+
uses: docker/setup-buildx-action@v3
432+
433+
- name: Build REST Gateway Docker image
434+
run: |
435+
docker buildx build . -f rest_gateway/Dockerfile -t opencue/rest-gateway:test --load
436+
echo "REST Gateway Docker image built successfully"

.gitignore

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,20 @@ cuebot/.project
2727
/pycue/opencue/compiled_proto/
2828
/rqd/rqd/compiled_proto/
2929
docker-compose-local.yml
30+
/sandbox/kafka*
31+
/sandbox/zookeeper*
3032
docs/_site/
3133
docs/bin/
3234
sandbox/kafka-data
3335
sandbox/zookeeper-data
3436
sandbox/zookeeper-logs
37+
sandbox/rqd/shots/
3538
docs/_data/version.yml
39+
target/*
40+
41+
# Agent files
42+
AGENTS.md
43+
CLAUDE.md
44+
45+
# Documentation navigation temporary file
46+
docs/nav_order_index.txt

VERSION.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.14
1+
1.15

cuebot/build.gradle

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ repositories {
2626
def grpcVersion = '1.47.0'
2727
def protobufVersion = '3.21.2'
2828
def activemqVersion = '5.12.0'
29+
def kafkaVersion = '3.4.0'
30+
def elasticsearchVersion = '8.8.0'
2931

3032
// Spring dependency versions are managed by the io.spring.dependency-management plugin.
3133
// Appropriate versions will be pulled based on the spring boot version specified in the
@@ -52,6 +54,15 @@ dependencies {
5254
implementation group: 'io.prometheus', name: 'simpleclient', version: '0.16.0'
5355
implementation group: 'io.prometheus', name: 'simpleclient_servlet', version: '0.16.0'
5456

57+
// Kafka for event publishing
58+
implementation group: 'org.apache.kafka', name: 'kafka-clients', version: "${kafkaVersion}"
59+
implementation group: 'org.springframework.kafka', name: 'spring-kafka', version: '2.9.0'
60+
61+
// Elasticsearch for historical data storage
62+
implementation group: 'co.elastic.clients', name: 'elasticsearch-java', version: "${elasticsearchVersion}"
63+
implementation group: 'org.elasticsearch.client', name: 'elasticsearch-rest-client', version: "${elasticsearchVersion}"
64+
implementation group: 'jakarta.json', name: 'jakarta.json-api', version: '2.1.1'
65+
5566
protobuf files("../proto/src/")
5667

5768
testImplementation group: 'junit', name: 'junit', version: '4.12'
@@ -67,12 +78,14 @@ dependencies {
6778

6879
compileJava {
6980
dependsOn generateProto
70-
options.compilerArgs << "-Xlint:all" << "-Werror"
81+
// Exclude serial warning due to protobuf-generated code warnings
82+
options.compilerArgs << "-Xlint:all,-serial" << "-Werror"
7183
}
7284

7385
compileTestJava {
7486
dependsOn generateProto
75-
options.compilerArgs << "-Xlint:all" << "-Werror"
87+
// Exclude serial warning due to protobuf-generated code warnings
88+
options.compilerArgs << "-Xlint:all,-serial" << "-Werror"
7689
}
7790

7891
protobuf {

cuebot/src/main/java/com/imageworks/spcue/CuebotApplication.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@
2424
import org.apache.logging.log4j.LogManager;
2525
import org.springframework.boot.SpringApplication;
2626
import org.springframework.boot.autoconfigure.SpringBootApplication;
27+
import org.springframework.boot.autoconfigure.kafka.KafkaAutoConfiguration;
2728

28-
@SpringBootApplication
29+
@SpringBootApplication(exclude = {KafkaAutoConfiguration.class})
2930
public class CuebotApplication extends SpringApplication {
3031
private static String[] checkArgs(String[] args) {
3132
Optional<String> deprecatedFlag = Arrays.stream(args)

cuebot/src/main/java/com/imageworks/spcue/ExecutionSummary.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@ public class ExecutionSummary {
2828
public long gpuTimeSuccess;
2929
public long gpuTimeFail;
3030
public long highMemoryKb;
31+
public int highFrameSec;
32+
33+
public int getHighFrameSec() {
34+
return highFrameSec;
35+
}
36+
37+
public void setHighFrameSec(int highFrameSec) {
38+
this.highFrameSec = highFrameSec;
39+
}
3140

3241
public long getHighMemoryKb() {
3342
return highMemoryKb;

cuebot/src/main/java/com/imageworks/spcue/PrometheusMetricsCollector.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,38 @@ public class PrometheusMetricsCollector {
119119
.labelNames("env", "cuebot_host", "render_node", "job_name", "frame_name", "frame_id")
120120
.register();
121121

122+
private static final Counter frameCompletedCounter = Counter.build()
123+
.name("cue_frames_completed_total").help("Total number of frames completed")
124+
.labelNames("env", "cuebot_host", "state", "show", "shot").register();
125+
126+
private static final Counter jobCompletedCounter =
127+
Counter.build().name("cue_jobs_completed_total").help("Total number of jobs completed")
128+
.labelNames("env", "cuebot_host", "state", "show", "shot").register();
129+
130+
private static final Histogram jobCoreSecondsHistogram = Histogram.build()
131+
.name("cue_job_core_seconds").help("Histogram of total core seconds per job")
132+
.labelNames("env", "cuebot_host", "show", "shot")
133+
.buckets(3600, 36000, 360000, 3600000, 36000000).register();
134+
135+
private static final Histogram layerMaxRuntimeHistogram =
136+
Histogram.build().name("cue_layer_max_runtime_seconds")
137+
.help("Histogram of max frame runtime per layer in seconds")
138+
.labelNames("env", "cuebot_host", "show", "shot", "layer_type")
139+
.buckets(60, 300, 600, 1800, 3600, 7200, 14400, 28800, 86400).register();
140+
141+
private static final Histogram layerMaxMemoryHistogram =
142+
Histogram.build().name("cue_layer_max_memory_bytes")
143+
.help("Histogram of max frame memory usage per layer in bytes")
144+
.labelNames("env", "cuebot_host", "show", "shot", "layer_type")
145+
.buckets(256L * 1024 * 1024, 512L * 1024 * 1024, 1024L * 1024 * 1024,
146+
2048L * 1024 * 1024, 4096L * 1024 * 1024, 8192L * 1024 * 1024,
147+
16384L * 1024 * 1024, 32768L * 1024 * 1024)
148+
.register();
149+
150+
private static final Counter hostReportsReceivedCounter = Counter.build()
151+
.name("cue_host_reports_received_total").help("Total number of host reports received")
152+
.labelNames("env", "cuebot_host", "facility").register();
153+
122154
private String deployment_environment;
123155
private String cuebot_host;
124156

@@ -269,6 +301,82 @@ public void incrementFrameKillFailureCounter(String hostname, String jobName, St
269301
jobName, frameName, frameId).inc();
270302
}
271303

304+
/**
305+
* Record a frame completion
306+
*
307+
* @param state final state of the frame
308+
* @param show show name
309+
* @param shot shot name
310+
*/
311+
public void recordFrameCompleted(String state, String show, String shot) {
312+
frameCompletedCounter
313+
.labels(this.deployment_environment, this.cuebot_host, state, show, shot).inc();
314+
}
315+
316+
/**
317+
* Record a job completion
318+
*
319+
* @param state final state of the job
320+
* @param show show name
321+
* @param shot shot name
322+
*/
323+
public void recordJobCompleted(String state, String show, String shot) {
324+
jobCompletedCounter.labels(this.deployment_environment, this.cuebot_host, state, show, shot)
325+
.inc();
326+
}
327+
328+
/**
329+
* Record job total core seconds for histogramming
330+
*
331+
* @param coreSeconds total core seconds consumed by the job
332+
* @param show show name
333+
* @param shot shot name
334+
*/
335+
public void recordJobCoreSeconds(double coreSeconds, String show, String shot) {
336+
jobCoreSecondsHistogram.labels(this.deployment_environment, this.cuebot_host, show, shot)
337+
.observe(coreSeconds);
338+
}
339+
340+
/**
341+
* Record layer max runtime for histogramming
342+
*
343+
* @param runtimeSeconds max runtime in seconds for the layer
344+
* @param show show name
345+
* @param shot shot name
346+
* @param layerType layer type
347+
*/
348+
public void recordLayerMaxRuntime(double runtimeSeconds, String show, String shot,
349+
String layerType) {
350+
layerMaxRuntimeHistogram
351+
.labels(this.deployment_environment, this.cuebot_host, show, shot, layerType)
352+
.observe(runtimeSeconds);
353+
}
354+
355+
/**
356+
* Record layer max memory usage for histogramming
357+
*
358+
* @param memoryBytes max memory in bytes for the layer
359+
* @param show show name
360+
* @param shot shot name
361+
* @param layerType layer type
362+
*/
363+
public void recordLayerMaxMemory(double memoryBytes, String show, String shot,
364+
String layerType) {
365+
layerMaxMemoryHistogram
366+
.labels(this.deployment_environment, this.cuebot_host, show, shot, layerType)
367+
.observe(memoryBytes);
368+
}
369+
370+
/**
371+
* Record a host report received
372+
*
373+
* @param facility facility name
374+
*/
375+
public void recordHostReport(String facility) {
376+
hostReportsReceivedCounter.labels(this.deployment_environment, this.cuebot_host, facility)
377+
.inc();
378+
}
379+
272380
// Setters used for dependency injection
273381
public void setBookingQueue(BookingQueue bookingQueue) {
274382
this.bookingQueue = bookingQueue;

cuebot/src/main/java/com/imageworks/spcue/config/AppConfig.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
"classpath:conf/spring/applicationContext-grpcServer.xml",
3939
"classpath:conf/spring/applicationContext-service.xml",
4040
"classpath:conf/spring/applicationContext-jms.xml",
41-
"classpath:conf/spring/applicationContext-criteria.xml"})
41+
"classpath:conf/spring/applicationContext-criteria.xml",
42+
"classpath:conf/spring/applicationContext-monitoring.xml"})
4243
@EnableConfigurationProperties
4344
@PropertySource({"classpath:opencue.properties"})
4445
public class AppConfig {

0 commit comments

Comments
 (0)