|
| 1 | +class TelemetryService |
| 2 | + # OpenTelemetry tracing service for adding span attributes |
| 3 | + # Follows the same pattern as CloudWatchService and LogEventService |
| 4 | + # |
| 5 | + # We are heavily using attributes, not events because X-Ray does not support events |
| 6 | + # see: https://github.com/aws-observability/aws-otel-collector/issues/821 |
| 7 | + |
| 8 | + # Set request-level attributes for journey tracking |
| 9 | + # Call from ApplicationController to add form/session context to all spans |
| 10 | + def self.set_request_attributes(attrs) |
| 11 | + return unless defined?(OpenTelemetry) |
| 12 | + |
| 13 | + # Ensure all values are primitives (string, number, boolean, nil) |
| 14 | + sanitized = attrs.compact.transform_values { |v| sanitize_attribute_value(v) } |
| 15 | + current_span.add_attributes(sanitized.transform_keys(&:to_s)) |
| 16 | + rescue StandardError => e |
| 17 | + Sentry.capture_exception(e) if defined?(Sentry) |
| 18 | + end |
| 19 | + |
| 20 | + # Set question-level attributes on page requests |
| 21 | + # Call from PageController to add question context to all page spans |
| 22 | + def self.set_question_attributes(step, form) |
| 23 | + return unless defined?(OpenTelemetry) |
| 24 | + |
| 25 | + attrs = { |
| 26 | + "question.type" => step.question.class.name, |
| 27 | + "question.id" => step.page_id, |
| 28 | + "question.text" => step.question_text, |
| 29 | + "question.answer_type" => step.page&.answer_type, |
| 30 | + "question.number" => step.page_number, |
| 31 | + "question.is_optional" => step.question.is_optional?, |
| 32 | + "question.is_repeatable" => step.repeatable?, |
| 33 | + "form.submission_type" => form.submission_type, |
| 34 | + }.compact |
| 35 | + |
| 36 | + sanitized = attrs.transform_values { |v| sanitize_attribute_value(v) } |
| 37 | + current_span.add_attributes(sanitized) |
| 38 | + rescue StandardError => e |
| 39 | + Sentry.capture_exception(e) if defined?(Sentry) |
| 40 | + end |
| 41 | + |
| 42 | + def self.record_validation_failure(step) |
| 43 | + return unless defined?(OpenTelemetry) |
| 44 | + |
| 45 | + attrs = { |
| 46 | + "validation.failed" => true, |
| 47 | + "validation.error_count" => step.question.errors.count, |
| 48 | + "validation.errors" => step.question.errors.full_messages.join(", "), |
| 49 | + "validation.error_attributes" => step.question.errors.attribute_names.map(&:to_s).join(", "), |
| 50 | + } |
| 51 | + |
| 52 | + sanitized = attrs.transform_values { |v| sanitize_attribute_value(v) } |
| 53 | + current_span.add_attributes(sanitized) |
| 54 | + rescue StandardError => e |
| 55 | + # Silently fail - don't break the app if telemetry has issues |
| 56 | + Sentry.capture_exception(e) if defined?(Sentry) |
| 57 | + end |
| 58 | + |
| 59 | + def self.record_validation_success |
| 60 | + return unless defined?(OpenTelemetry) |
| 61 | + |
| 62 | + current_span.set_attribute("validation.passed", true) |
| 63 | + rescue StandardError => e |
| 64 | + Sentry.capture_exception(e) if defined?(Sentry) |
| 65 | + end |
| 66 | + |
| 67 | + # Create a custom span for wrapping important operations |
| 68 | + # Usage: TelemetryService.trace('operation.name', attributes: {...}) { ... } |
| 69 | + def self.trace(span_name, attributes: {}, &block) |
| 70 | + return yield(NoOpSpan.new) unless defined?(OpenTelemetry) |
| 71 | + |
| 72 | + # Get tracer |
| 73 | + tracer = OpenTelemetry.tracer_provider.tracer("forms-runner") |
| 74 | + |
| 75 | + # Sanitize attributes to ensure they're primitives |
| 76 | + sanitized = attributes.compact.transform_values { |v| sanitize_attribute_value(v) } |
| 77 | + |
| 78 | + tracer.in_span(span_name, attributes: sanitized, &block) |
| 79 | + rescue StandardError => e |
| 80 | + Sentry.capture_exception(e) if defined?(Sentry) |
| 81 | + # If tracing fails, still execute the block with a no-op span |
| 82 | + # This ensures business logic runs even if telemetry breaks |
| 83 | + yield(NoOpSpan.new) |
| 84 | + end |
| 85 | + |
| 86 | + def self.current_span |
| 87 | + OpenTelemetry::Trace.current_span |
| 88 | + end |
| 89 | + private_class_method :current_span |
| 90 | + |
| 91 | + # Sanitize attribute values to ensure they're primitives (String, Integer, Float, Boolean) |
| 92 | + # OpenTelemetry requires attribute values to be primitives, not complex objects |
| 93 | + def self.sanitize_attribute_value(value) |
| 94 | + case value |
| 95 | + when String, Integer, Float, TrueClass, FalseClass, NilClass |
| 96 | + value |
| 97 | + when Array |
| 98 | + value.join(", ") |
| 99 | + else |
| 100 | + value.to_s |
| 101 | + end |
| 102 | + end |
| 103 | + private_class_method :sanitize_attribute_value |
| 104 | + |
| 105 | + # No-op span that safely ignores all method calls |
| 106 | + # Used as a fallback when tracing is disabled or fails |
| 107 | + class NoOpSpan |
| 108 | + def method_missing(_method_name, *_args, **_kwargs, &_block) |
| 109 | + # Silently ignore all method calls (set_attribute, add_event, etc.) |
| 110 | + nil |
| 111 | + end |
| 112 | + |
| 113 | + def respond_to_missing?(_method_name, _include_private = false) |
| 114 | + true |
| 115 | + end |
| 116 | + end |
| 117 | +end |
0 commit comments