Shopify · nirmitparikh8 · Nov 7, 2025 · Nov 3, 2025 · Nov 7, 2025
diff --git a/.github/workflows/automated-experiment-result-checker.yml b/.github/workflows/automated-experiment-result-checker.yml
@@ -0,0 +1,66 @@
+---
+name: "Automated Experiment Result Checker"
+
+# yamllint disable-line rule:truthy
+on:
+  pull_request:
+    types: [opened, reopened, synchronize]
+
+concurrency:
+  group: ${{ github.ref }}-automated-experiment-result-checker
+  cancel-in-progress: true
+
+permissions:
+  contents: write
+  pull-requests: write
+jobs:
+  automated-experiment-result-checker:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+          fetch-depth: 0
+
+      - name: Check for updated experiment result graphs
+        run: |
+          set -e
+          cd "$(git rev-parse --show-toplevel)"
+
+          # TODO: Include lower bound windup experiment once we have a way to make it run in a reasonable time.
+          # Find all PNGs, excluding those with "windup" in their filename
+          mapfile -t all_pngs < <(find experiments/results/main_graphs experiments/results/throughput_graphs experiments/results/duration_graphs -type f -name '*.png' ! -name '*windup*.png' | sort)
+
+          # Find all changed PNGs in the latest commit
+          mapfile -t changed_pngs < <(git diff --name-only --diff-filter=AM HEAD~1..HEAD | grep -E '^experiments/results/(main_graphs|throughput_graphs|duration_graphs)/.*\.png$' | grep -v windup | sort)
+
+          # Report any PNGs that are not updated in the latest commit
+          declare -a not_updated=()
+          for file in "${all_pngs[@]}"; do
+            if ! printf "%s\n" "${changed_pngs[@]}" | grep -qx "$file"; then
+              not_updated+=("$file")
+            fi
+          done
+
+          if [ ${#not_updated[@]} -gt 0 ]; then
+            echo "❌ The following result graph PNG files have NOT been updated in the latest commit:"
+            for f in "${not_updated[@]}"; do
+              echo "  - $f"
+            done
+            echo ""
+            echo "Every commit must update all non-windup experiment result graphs. You may be missing updates."
+            echo "Run:"
+            echo ""
+            echo "    cd experiments"
+            echo "    bundle install"
+            echo "    bundle exec ruby run_all_experiments.rb"
+            echo ""
+            echo "Commit the updated graphs to resolve this check."
+            exit 1
+          fi
+
+          echo "✅ All non-windup experiment result graphs are up to date for this commit!"
+
+
+
diff --git a/experiments/duration-one_of_many_services_latency_degradation.png b/experiments/duration-one_of_many_services_latency_degradation.png
diff --git a/experiments/duration-one_of_many_services_latency_degradation_adaptive.png b/experiments/duration-one_of_many_services_latency_degradation_adaptive.png
diff --git a/experiments/example_output.png b/experiments/example_output.png
diff --git a/experiments/example_with_circuit_breaker.rb b/experiments/example_with_circuit_breaker.rb
diff --git a/experiments/test_helpers.rb → experiments/experiment_helpers.rb b/experiments/test_helpers.rb → experiments/experiment_helpers.rb
@@ -2,8 +2,9 @@
 
 module Semian
   module Experiments
-    # Test runner for circuit breaker experiments (both adaptive and classic)
+    # Experiment runner for circuit breaker experiments (both adaptive and classic)
     # Handles all the common logic: service creation, threading, monitoring, analysis, and visualization
+    require "fileutils"
     class DegradationPhase
       attr_reader :healthy, :error_rate, :latency
 
@@ -14,11 +15,11 @@ def initialize(healthy: nil, error_rate: nil, latency: nil)
       end
     end
 
-    class CircuitBreakerTestRunner
-      attr_reader :test_name, :resource_name, :degradation_phases, :phase_duration, :graph_title, :graph_filename, :service_count, :target_service
+    class CircuitBreakerExperimentRunner
+      attr_reader :experiment_name, :resource_name, :degradation_phases, :phase_duration, :graph_title, :graph_filename, :service_count, :target_service
 
       def initialize(
-        test_name:,
+        experiment_name:,
         resource_name:,
         degradation_phases:,
         phase_duration:,
@@ -32,18 +33,24 @@ def initialize(
         graph_bucket_size: nil,
         base_error_rate: nil
       )
-        @test_name = test_name
+        @experiment_name = experiment_name
         @resource_name = resource_name
         @degradation_phases = degradation_phases
         @phase_duration = phase_duration
         @graph_title = graph_title
         @semian_config = semian_config
         @is_adaptive = semian_config[:adaptive_circuit_breaker] == true
         @graph_filename = graph_filename || "#{resource_name}.png"
+        @main_results_path = File.join(File.dirname(__FILE__), "results/main_graphs")
+        @duration_results_path = File.join(File.dirname(__FILE__), "results/duration_graphs")
+        @throughput_results_path = File.join(File.dirname(__FILE__), "results/throughput_graphs")
+        FileUtils.mkdir_p(@main_results_path) unless File.directory?(@main_results_path)
+        FileUtils.mkdir_p(@duration_results_path) unless File.directory?(@duration_results_path)
+        FileUtils.mkdir_p(@throughput_results_path) unless File.directory?(@throughput_results_path)
         @num_threads = num_threads
         @requests_per_second_per_thread = requests_per_second_per_thread
         @x_axis_label_interval = x_axis_label_interval || phase_duration
-        @test_duration = degradation_phases.length * phase_duration
+        @experiment_duration = degradation_phases.length * phase_duration
         @service_count = service_count
         @target_service = nil
         @graph_bucket_size = graph_bucket_size || (@is_adaptive ? 10 : 1)
@@ -232,12 +239,12 @@ def subscribe_to_state_changes
       end
 
       def execute_phases
-        puts "\n=== #{@test_name} (ADAPTIVE) ==="
+        puts "\n=== #{@experiment_name} (ADAPTIVE) ==="
         puts "Error rate: #{@degradation_phases.map { |r| r.error_rate ? "#{(r.error_rate * 100).round(1)}%" : "N/A" }.join(" -> ")}"
         puts "Latency: #{@degradation_phases.map { |r| r.latency ? "#{(r.latency * 1000).round(1)}ms" : "N/A" }.join(" -> ")}"
         puts "Phase duration: #{@phase_duration} seconds (#{(@phase_duration / 60.0).round(1)} minutes) per phase"
-        puts "Duration: #{@test_duration} seconds (#{(@test_duration / 60.0).round(1)} minutes)"
-        puts "Starting test...\n"
+        puts "Duration: #{@experiment_duration} seconds (#{(@experiment_duration / 60.0).round(1)} minutes)"
+        puts "Starting experiment...\n"
 
         @start_time = Time.now
 
@@ -281,7 +288,7 @@ def wait_for_completion
       end
 
       def generate_analysis
-        puts "\n\n=== Test Complete ==="
+        puts "\n\n=== Experiment Complete ==="
         puts "Actual duration: #{(@end_time - @start_time).round(2)} seconds"
         puts "\nGenerating analysis..."
 
@@ -306,7 +313,7 @@ def display_summary_statistics
 
       def display_time_based_analysis
         bucket_size = @phase_duration
-        num_buckets = (@test_duration / bucket_size.to_f).ceil
+        num_buckets = (@experiment_duration / bucket_size.to_f).ceil
 
         puts "\n=== Time-Based Analysis (#{bucket_size}-second buckets) ==="
         (0...num_buckets).each do |bucket_idx|
@@ -351,7 +358,7 @@ def display_thread_timing_statistics
         avg_utilization = (avg_thread_time / total_wall_time * 100)
 
         puts "Total threads: #{@thread_timings.size}"
-        puts "Test wall clock duration: #{total_wall_time.round(2)}s"
+        puts "Experiment wall clock duration: #{total_wall_time.round(2)}s"
         puts "\nTime spent making requests per thread:"
         puts "  Min:     #{min_thread_time.round(2)}s"
         puts "  Max:     #{max_thread_time.round(2)}s"
@@ -450,7 +457,7 @@ def generate_visualization
 
         # Aggregate data into buckets for detailed visualization
         bucket_size = @graph_bucket_size
-        num_buckets = (@test_duration / bucket_size.to_f).ceil
+        num_buckets = (@experiment_duration / bucket_size.to_f).ceil
 
         bucketed_data = []
         (0...num_buckets).each do |bucket_idx|
@@ -503,8 +510,9 @@ def generate_visualization
           add_state_transition_markers(graph, bucketed_data, bucket_size, num_buckets)
         end
 
-        graph.write(@graph_filename)
-        puts "Graph saved to #{@graph_filename}"
+        main_graph_path = File.join(@main_results_path, @graph_filename)
+        graph.write(main_graph_path)
+        puts "Graph saved to #{main_graph_path}"
 
         # Generate duration graph
         duration_graph = Gruff::Line.new(1400)
@@ -518,8 +526,9 @@ def generate_visualization
         duration_graph.data("Total Request Duration", bucketed_data.map { |d| d[:sum_request_duration] })
 
         duration_filename = @graph_filename.sub(%r{([^/]+)$}, 'duration-\1')
-        duration_graph.write(duration_filename)
-        puts "Duration graph saved to #{duration_filename}"
+        duration_graph_path = File.join(@duration_results_path, duration_filename)
+        duration_graph.write(duration_graph_path)
+        puts "Duration graph saved to #{duration_graph_path}"
 
         # Generate throughput graph
         throughput_graph = Gruff::Line.new(1400)
@@ -533,18 +542,19 @@ def generate_visualization
         throughput_graph.data("Total Request Throughput", bucketed_data.map { |d| d[:throughput] })
 
         throughput_filename = @graph_filename.sub(%r{([^/]+)$}, 'throughput-\1')
-        throughput_graph.write(throughput_filename)
-        puts "Throughput graph saved to #{throughput_filename}"
+        throughput_graph_path = File.join(@throughput_results_path, throughput_filename)
+        throughput_graph.write(throughput_graph_path)
+        puts "Throughput graph saved to #{throughput_graph_path}"
       end
 
       def add_state_transition_markers(graph, bucketed_data, bucket_size, num_buckets)
         return if @state_transitions.empty?
 
-        test_start = @outcomes.keys[0]
+        experiment_start = @outcomes.keys[0]
 
         @state_transitions.each_with_index do |transition, idx|
           # Calculate which bucket this transition falls into
-          elapsed = transition[:timestamp] - test_start
+          elapsed = transition[:timestamp] - experiment_start
           bucket_idx = (elapsed / bucket_size).to_i
 
           next if bucket_idx < 0 || bucket_idx >= num_buckets

diff --git a/experiments/test_error_spike_100.rb → ...experiments/experiment_error_spike_100.rb b/experiments/test_error_spike_100.rb → ...experiments/experiment_error_spike_100.rb
@@ -1,15 +1,15 @@
 # frozen_string_literal: true
 
-$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
+$LOAD_PATH.unshift(File.expand_path("../../lib", __dir__))
 
 require "semian"
-require_relative "mock_service"
-require_relative "experimental_resource"
-require_relative "test_helpers"
+require_relative "../mock_service"
+require_relative "../experimental_resource"
+require_relative "../experiment_helpers"
 
-# Sudden error spike test: 1% -> 100% -> 1%
-runner = Semian::Experiments::CircuitBreakerTestRunner.new(
-  test_name: "Sudden Error Spike Test (Classic) - 100% for 20 seconds",
+# Sudden error spike experiment: 1% -> 100% -> 1%
+runner = Semian::Experiments::CircuitBreakerExperimentRunner.new(
+  experiment_name: "Sudden Error Spike Experiment (Classic) - 100% for 20 seconds",
   resource_name: "protected_service_sudden_error_spike_100",
   degradation_phases: [Semian::Experiments::DegradationPhase.new(healthy: true)] * 3 +
                       [Semian::Experiments::DegradationPhase.new(error_rate: 1.00)] +
@@ -22,7 +22,7 @@
     error_timeout: 15,
     bulkhead: false,
   },
-  graph_title: "Sudden Error Spike Test (Classic) - 100% for 20 seconds",
+  graph_title: "Sudden Error Spike Experiment (Classic) - 100% for 20 seconds",
   graph_filename: "sudden_error_spike_100.png",
   x_axis_label_interval: 60,
 )

diff --git a/experiments/test_error_spike_100_adaptive.rb → ...ts/experiment_error_spike_100_adaptive.rb b/experiments/test_error_spike_100_adaptive.rb → ...ts/experiment_error_spike_100_adaptive.rb
@@ -1,15 +1,15 @@
 # frozen_string_literal: true
 
-$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
+$LOAD_PATH.unshift(File.expand_path("../../lib", __dir__))
 
 require "semian"
-require_relative "mock_service"
-require_relative "experimental_resource"
-require_relative "test_helpers"
+require_relative "../mock_service"
+require_relative "../experimental_resource"
+require_relative "../experiment_helpers"
 
-# Sudden error spike test: 1% -> 100% -> 1%
-runner = Semian::Experiments::CircuitBreakerTestRunner.new(
-  test_name: "Sudden Error Spike Test (Adaptive) - 100% for 20 seconds",
+# Sudden error spike experiment: 1% -> 100% -> 1%
+runner = Semian::Experiments::CircuitBreakerExperimentRunner.new(
+  experiment_name: "Sudden Error Spike Experiment (Adaptive) - 100% for 20 seconds",
   resource_name: "protected_service_sudden_error_spike_100_adaptive",
   degradation_phases: [Semian::Experiments::DegradationPhase.new(healthy: true)] * 3 +
                       [Semian::Experiments::DegradationPhase.new(error_rate: 1.00)] +
@@ -19,7 +19,7 @@
     adaptive_circuit_breaker: true,
     bulkhead: false,
   },
-  graph_title: "Sudden Error Spike Test (Adaptive) - 100% for 20 seconds",
+  graph_title: "Sudden Error Spike Experiment (Adaptive) - 100% for 20 seconds",
   graph_filename: "sudden_error_spike_100_adaptive.png",
   x_axis_label_interval: 60,
 )