Skip to content

Commit 476256d

Browse files
authored
Merge pull request #5320 from DataDog/appsec-60450-add-parsed-body-address-to-downstream-request
[APPSEC-60450] Add parsed body address to downstream request analysis
2 parents 1fdc59c + ff906db commit 476256d

35 files changed

+1354
-186
lines changed

lib/datadog/appsec/configuration/settings.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,20 @@ def self.add_settings!(base)
393393
end
394394
end
395395
end
396+
397+
settings :downstream_body_analysis do
398+
option :sample_rate do |o|
399+
o.type :float
400+
o.env 'DD_API_SECURITY_DOWNSTREAM_BODY_ANALYSIS_SAMPLE_RATE'
401+
o.default 0.5
402+
end
403+
404+
option :max_requests do |o|
405+
o.type :int
406+
o.env 'DD_API_SECURITY_MAX_DOWNSTREAM_REQUEST_BODY_ANALYSIS'
407+
o.default 1
408+
end
409+
end
396410
end
397411

398412
option :sca_enabled do |o|

lib/datadog/appsec/context.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# frozen_string_literal: true
22

3+
require_relative 'counter_sampler'
34
require_relative 'metrics'
45

56
module Datadog
@@ -27,6 +28,9 @@ class Context
2728
# it's a `Hash`-like structure.
2829
attr_reader :state
2930

31+
# Sampler for downstream HTTP request/response body analysis.
32+
attr_reader :downstream_body_sampler
33+
3034
class << self
3135
def activate(context)
3236
raise ArgumentError, 'not a Datadog::AppSec::Context' unless context.instance_of?(Context)
@@ -51,9 +55,13 @@ def initialize(trace, span, waf_runner)
5155
@span = span
5256
@waf_runner = waf_runner
5357
@metrics = Metrics::Collector.new
58+
@downstream_body_sampler = CounterSampler.new(
59+
Datadog.configuration.appsec.api_security.downstream_body_analysis.sample_rate
60+
)
5461
@state = {
5562
events: [],
56-
interrupted: false
63+
interrupted: false,
64+
downstream_body_analyzed_count: 0
5765
}
5866
end
5967

lib/datadog/appsec/contrib/excon/ssrf_detection_middleware.rb

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,36 @@
55
require_relative '../../event'
66
require_relative '../../trace_keeper'
77
require_relative '../../security_event'
8+
require_relative '../../utils/http/url_encoded'
9+
require_relative '../../utils/http/body'
810

911
module Datadog
1012
module AppSec
1113
module Contrib
1214
module Excon
1315
# AppSec Middleware for Excon
1416
class SSRFDetectionMiddleware < ::Excon::Middleware::Base
17+
SAMPLE_BODY_KEY = :__datadog_appsec_sample_downstream_body
18+
1519
def request_call(data)
1620
context = AppSec.active_context
1721
return super unless context && AppSec.rasp_enabled?
1822

19-
timeout = Datadog.configuration.appsec.waf_timeout
23+
mark_body_sampling!(data, context: context)
24+
25+
headers = normalize_headers(data[:headers])
26+
# @type var ephemeral_data: ::Datadog::AppSec::Context::input_data
2027
ephemeral_data = {
2128
'server.io.net.url' => request_url(data),
2229
'server.io.net.request.method' => data[:method].to_s.upcase,
23-
'server.io.net.request.headers' => normalize_headers(data[:headers])
30+
'server.io.net.request.headers' => headers
2431
}
2532

33+
if data[SAMPLE_BODY_KEY] && (body = parse_body(data[:body], content_type: headers['content-type']))
34+
ephemeral_data['server.io.net.request.body'] = body
35+
end
36+
37+
timeout = Datadog.configuration.appsec.waf_timeout
2638
result = context.run_rasp(Ext::RASP_SSRF, {}, ephemeral_data, timeout, phase: Ext::RASP_REQUEST_PHASE)
2739
handle(result, context: context) if result.match?
2840

@@ -33,12 +45,18 @@ def response_call(data)
3345
context = AppSec.active_context
3446
return super unless context && AppSec.rasp_enabled?
3547

36-
timeout = Datadog.configuration.appsec.waf_timeout
48+
headers = normalize_headers(data.dig(:response, :headers))
49+
# @type var ephemeral_data: ::Datadog::AppSec::Context::input_data
3750
ephemeral_data = {
3851
'server.io.net.response.status' => data.dig(:response, :status).to_s,
39-
'server.io.net.response.headers' => normalize_headers(data.dig(:response, :headers))
52+
'server.io.net.response.headers' => headers
4053
}
4154

55+
if data[SAMPLE_BODY_KEY] && (body = parse_body(data.dig(:response, :body), content_type: headers['content-type']))
56+
ephemeral_data['server.io.net.response.body'] = body
57+
end
58+
59+
timeout = Datadog.configuration.appsec.waf_timeout
4260
result = context.run_rasp(Ext::RASP_SSRF, {}, ephemeral_data, timeout, phase: Ext::RASP_RESPONSE_PHASE)
4361
handle(result, context: context) if result.match?
4462

@@ -47,6 +65,22 @@ def response_call(data)
4765

4866
private
4967

68+
def mark_body_sampling!(data, context:)
69+
max = Datadog.configuration.appsec.api_security.downstream_body_analysis.max_requests
70+
return if context.state[:downstream_body_analyzed_count] >= max
71+
return unless context.downstream_body_sampler.sample?
72+
73+
context.state[:downstream_body_analyzed_count] += 1
74+
data[SAMPLE_BODY_KEY] = true
75+
end
76+
77+
def parse_body(body, content_type:)
78+
media_type = Utils::HTTP::MediaType.parse(content_type)
79+
return unless media_type
80+
81+
Utils::HTTP::Body.parse(body, media_type: media_type)
82+
end
83+
5084
def request_url(data)
5185
klass = (data[:scheme] == 'https') ? URI::HTTPS : URI::HTTP
5286
klass.build(host: data[:host], path: data[:path], query: data[:query]).to_s

lib/datadog/appsec/contrib/faraday/ssrf_detection_middleware.rb

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,36 @@
33
require_relative '../../event'
44
require_relative '../../trace_keeper'
55
require_relative '../../security_event'
6+
require_relative '../../utils/http/media_type'
7+
require_relative '../../utils/http/body'
68

79
module Datadog
810
module AppSec
911
module Contrib
1012
module Faraday
1113
# AppSec SSRF detection Middleware for Faraday
1214
class SSRFDetectionMiddleware < ::Faraday::Middleware
15+
SAMPLE_BODY_KEY = :__datadog_appsec_sample_downstream_body
16+
1317
def call(env)
1418
context = AppSec.active_context
1519
return @app.call(env) unless context && AppSec.rasp_enabled?
1620

17-
timeout = Datadog.configuration.appsec.waf_timeout
21+
mark_body_sampling!(env, context: context)
22+
23+
headers = normalize_headers(env.request_headers)
24+
# @type var ephemeral_data: ::Datadog::AppSec::Context::input_data
1825
ephemeral_data = {
1926
'server.io.net.url' => env.url.to_s,
2027
'server.io.net.request.method' => env.method.to_s.upcase,
21-
'server.io.net.request.headers' => env.request_headers.transform_keys(&:downcase)
28+
'server.io.net.request.headers' => headers
2229
}
2330

31+
if env[SAMPLE_BODY_KEY] && (body = parse_body(env.body, content_type: headers['content-type']))
32+
ephemeral_data['server.io.net.request.body'] = body
33+
end
34+
35+
timeout = Datadog.configuration.appsec.waf_timeout
2436
result = context.run_rasp(Ext::RASP_SSRF, {}, ephemeral_data, timeout, phase: Ext::RASP_REQUEST_PHASE)
2537
handle(result, context: context) if result.match?
2638

@@ -30,18 +42,44 @@ def call(env)
3042
private
3143

3244
def on_complete(env, context:)
33-
timeout = Datadog.configuration.appsec.waf_timeout
34-
35-
response_headers = env.response_headers || {}
45+
headers = normalize_headers(env.response_headers)
46+
# @type var ephemeral_data: ::Datadog::AppSec::Context::input_data
3647
ephemeral_data = {
3748
'server.io.net.response.status' => env.status.to_s,
38-
'server.io.net.response.headers' => response_headers.transform_keys(&:downcase)
49+
'server.io.net.response.headers' => headers
3950
}
4051

52+
if env[SAMPLE_BODY_KEY] && (body = parse_body(env.body, content_type: headers['content-type']))
53+
ephemeral_data['server.io.net.response.body'] = body
54+
end
55+
56+
timeout = Datadog.configuration.appsec.waf_timeout
4157
result = context.run_rasp(Ext::RASP_SSRF, {}, ephemeral_data, timeout, phase: Ext::RASP_RESPONSE_PHASE)
4258
handle(result, context: context) if result.match?
4359
end
4460

61+
def mark_body_sampling!(env, context:)
62+
max = Datadog.configuration.appsec.api_security.downstream_body_analysis.max_requests
63+
return if context.state[:downstream_body_analyzed_count] >= max
64+
return unless context.downstream_body_sampler.sample?
65+
66+
context.state[:downstream_body_analyzed_count] += 1
67+
env[SAMPLE_BODY_KEY] = true
68+
end
69+
70+
def parse_body(body, content_type:)
71+
media_type = Utils::HTTP::MediaType.parse(content_type)
72+
return unless media_type
73+
74+
Utils::HTTP::Body.parse(body, media_type: media_type)
75+
end
76+
77+
def normalize_headers(headers)
78+
return {} if headers.nil? || headers.empty?
79+
80+
headers.transform_keys(&:downcase)
81+
end
82+
4583
def handle(result, context:)
4684
AppSec::Event.tag(context, result)
4785
TraceKeeper.keep!(context.trace) if result.keep?

lib/datadog/appsec/contrib/rack/gateway/request.rb

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,12 @@ def request
2323
end
2424

2525
def query
26-
# Downstream libddwaf expects keys and values to be extractable
27-
# separately so we can't use [[k, v], ...]. We also want to allow
28-
# duplicate keys, so we use {k => [v, ...], ...} instead, taking into
29-
# account that {k => [v1, v2, ...], ...} is possible for duplicate keys.
30-
request.query_string.split('&').each.with_object({}) do |e, hash|
31-
k, v = e.split('=').map { |s| CGI.unescape(s) }
32-
hash[k] ||= []
33-
34-
hash[k] << v
35-
end
26+
::Rack::Utils.parse_query(request.query_string)
27+
rescue => e
28+
Datadog.logger.debug { "AppSec: Failed to parse request query string: #{e.class}: #{e.message}" }
29+
AppSec.telemetry.report(e, description: 'AppSec: Failed to parse request query string')
30+
31+
{}
3632
end
3733

3834
def method

lib/datadog/appsec/contrib/rest_client/request_ssrf_detection_patch.rb

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
require_relative '../../event'
44
require_relative '../../trace_keeper'
55
require_relative '../../security_event'
6+
require_relative '../../utils/http/media_type'
7+
require_relative '../../utils/http/body'
68

79
module Datadog
810
module AppSec
@@ -14,23 +16,36 @@ def execute(&block)
1416
context = AppSec.active_context
1517
return super unless context && AppSec.rasp_enabled?
1618

17-
timeout = Datadog.configuration.appsec.waf_timeout
19+
headers = normalize_request_headers
20+
# @type var ephemeral_data: ::Datadog::AppSec::Context::input_data
1821
ephemeral_data = {
1922
'server.io.net.url' => url,
2023
'server.io.net.request.method' => method.to_s.upcase,
21-
'server.io.net.request.headers' => normalize_request_headers
24+
'server.io.net.request.headers' => headers
2225
}
2326

27+
sample_body = mark_body_sampling!(context)
28+
if sample_body && (body = parse_body(payload.to_s, content_type: headers['content-type']))
29+
ephemeral_data['server.io.net.request.body'] = body
30+
end
31+
32+
timeout = Datadog.configuration.appsec.waf_timeout
2433
result = context.run_rasp(Ext::RASP_SSRF, {}, ephemeral_data, timeout, phase: Ext::RASP_REQUEST_PHASE)
2534
handle(result, context: context) if result.match?
2635

2736
response = super
2837

38+
headers = normalize_response_headers(response)
39+
# @type var ephemeral_data: ::Datadog::AppSec::Context::input_data
2940
ephemeral_data = {
3041
'server.io.net.response.status' => response.code.to_s,
31-
'server.io.net.response.headers' => normalize_response_headers(response)
42+
'server.io.net.response.headers' => headers
3243
}
3344

45+
if sample_body && (body = parse_body(response.body, content_type: headers['content-type']))
46+
ephemeral_data['server.io.net.response.body'] = body
47+
end
48+
3449
result = context.run_rasp(Ext::RASP_SSRF, {}, ephemeral_data, timeout, phase: Ext::RASP_RESPONSE_PHASE)
3550
handle(result, context: context) if result.match?
3651

@@ -39,6 +54,24 @@ def execute(&block)
3954

4055
private
4156

57+
def mark_body_sampling!(context)
58+
max = Datadog.configuration.appsec.api_security.downstream_body_analysis.max_requests
59+
return false if context.state[:downstream_body_analyzed_count] >= max
60+
return false unless context.downstream_body_sampler.sample?
61+
62+
context.state[:downstream_body_analyzed_count] += 1
63+
true
64+
end
65+
66+
def parse_body(body, content_type:)
67+
return if body.empty?
68+
69+
media_type = Utils::HTTP::MediaType.parse(content_type)
70+
return unless media_type
71+
72+
Utils::HTTP::Body.parse(body, media_type: media_type)
73+
end
74+
4275
# NOTE: Starting version 2.1.0 headers are already normalized via internal
4376
# variable `@processed_headers_lowercase`. In case it's available,
4477
# we use it to avoid unnecessary transformation.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# frozen_string_literal: true
2+
3+
require_relative '../core/knuth_sampler'
4+
5+
module Datadog
6+
module AppSec
7+
# Sampler that uses an internal counter to make deterministic sampling decisions.
8+
#
9+
# Each call to {#sample?} increments the counter and uses it as input to
10+
# the underlying Knuth multiplicative hash algorithm.
11+
#
12+
# @api private
13+
class CounterSampler
14+
def initialize(rate = 1.0)
15+
@sampler = Core::KnuthSampler.new(rate)
16+
@counter = 0
17+
end
18+
19+
def sample?
20+
@counter += 1
21+
@sampler.sample?(@counter)
22+
end
23+
end
24+
end
25+
end
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# frozen_string_literal: true
2+
3+
require 'json'
4+
require 'cgi'
5+
6+
require_relative 'url_encoded'
7+
8+
module Datadog
9+
module AppSec
10+
module Utils
11+
module HTTP
12+
# Module for handling HTTP body parsing
13+
module Body
14+
def self.parse(body, media_type:)
15+
return if body.nil?
16+
17+
body.rewind if body.respond_to?(:rewind) # steep:ignore NoMethod
18+
# @type var content: ::String?
19+
content = body.respond_to?(:read) ? body.read : body # steep:ignore NoMethod, IncompatibleAssignment
20+
body.rewind if body.respond_to?(:rewind) # steep:ignore NoMethod
21+
22+
return if content.nil? || content.empty?
23+
24+
if media_type.subtype == 'json' || media_type.subtype.end_with?('+json')
25+
JSON.parse(content)
26+
elsif media_type.subtype == 'x-www-form-urlencoded'
27+
URLEncoded.parse(content)
28+
end
29+
rescue => e
30+
AppSec.telemetry.report(e, description: 'AppSec: Failed to parse body')
31+
32+
nil
33+
end
34+
end
35+
end
36+
end
37+
end
38+
end

lib/datadog/appsec/utils/http/media_range.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ def <=>(other)
168168
#
169169
# returns true if the MediaType is accepted by this MediaRange
170170
def ===(other)
171-
return self === MediaType.new(other) if other.is_a?(::String)
171+
return false if other.nil?
172+
return self === MediaType.parse(other) if other.is_a?(::String)
172173

173174
type == other.type && subtype == other.subtype && other.parameters.all? { |k, v| parameters[k] == v } ||
174175
type == other.type && wildcard?(:subtype) ||

0 commit comments

Comments
 (0)