@@ -85,11 +85,16 @@ local function build_cluster_resource(cluster_name, options)
8585 local resource = {
8686 [" @type" ] = " type.googleapis.com/envoy.config.cluster.v3.Cluster" ,
8787 name = cluster_name ,
88- type = " STRICT_DNS" ,
88+ cluster_type = {
89+ name = " envoy.clusters.strict_dns" ,
90+ typed_config = {
91+ [" @type" ] = " type.googleapis.com/envoy.extensions.clusters.dns.v3.DnsCluster" ,
92+ typed_dns_resolver_config = dns_resolver_config ,
93+ respect_dns_ttl = true ,
94+ },
95+ },
8996 wait_for_warm_on_init = false ,
90- typed_dns_resolver_config = dns_resolver_config ,
9197 dns_lookup_family = dns_lookup_family ,
92- respect_dns_ttl = true ,
9398 ignore_health_on_host_removal = true ,
9499 load_assignment = {
95100 cluster_name = cluster_name ,
@@ -102,11 +107,22 @@ local function build_cluster_resource(cluster_name, options)
102107 typed_extension_protocol_options = {
103108 [" envoy.extensions.upstreams.http.v3.HttpProtocolOptions" ] = {
104109 [" @type" ] = " type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions" ,
105- upstream_http_protocol_options = {
106- auto_sni = true ,
110+ -- Make all API backend requests over HTTP 1.1 (instead of HTTP 2 or
111+ -- 3). Since our nginx layer already downgraded any proxied requests to
112+ -- HTTP 1.1 (since nginx doesn't currently support proxying via other
113+ -- versions), it seems simplest to stick with that approach (even
114+ -- though Envoy could proxy using a different version).
115+ explicit_http_config = {
116+ http_protocol_options = {},
107117 },
108118 common_http_protocol_options = {
109- idle_timeout = " 1s" ,
119+ -- Idle timeout for keepalive connections to upstream servers (API
120+ -- backends).
121+ --
122+ -- Since API backends can be remote, keepalive connections can be
123+ -- important to improving performance by keeping pre-established
124+ -- connections around.
125+ idle_timeout = file_config [" router" ][" api_backends" ][" keepalive_idle_timeout" ] .. " s" ,
110126 },
111127 },
112128 },
@@ -120,19 +136,16 @@ local function build_cluster_resource(cluster_name, options)
120136 },
121137 }
122138
123- -- Use the "negative_ttl" time as Envoy's DNS refresh rate. Since we have
124- -- "respect_dns_ttl" enabled, successful DNS requests will use that refresh
125- -- rate instead of this one. So effectively the "dns_refresh_rate" should
126- -- only be used in failure situations, so we can use this to provide a TTL
127- -- for negative responses.
128- --
129- -- Envoy also supports the more explicit "dns_failure_refresh_rate" option,
130- -- but that includes an exponential backoff algorithm, with random jitter,
131- -- making it harder to test against. So to replicate how our "negative_ttl"
132- -- has worked under other DNS situations, we will use this "dns_refresh_rate"
133- -- (which doesn't do backoff or jitter).
139+ -- Use the "negative_ttl" time as Envoy's DNS refresh rate when failures
140+ -- occur. Since we have "respect_dns_ttl" enabled, successful DNS requests
141+ -- will use that refresh rate instead of this one. Since this is only used in
142+ -- failure situations we can use this to provide a TTL for negative
143+ -- responses.
134144 if file_config [" dns_resolver" ][" negative_ttl" ] then
135- resource [" dns_refresh_rate" ] = file_config [" dns_resolver" ][" negative_ttl" ] .. " s"
145+ resource [" cluster_type" ][" typed_config" ][" dns_failure_refresh_rate" ] = {
146+ base_interval = file_config [" dns_resolver" ][" negative_ttl" ] .. " s" ,
147+ max_interval = file_config [" dns_resolver" ][" negative_ttl" ] .. " s" ,
148+ }
136149 end
137150
138151 local servers
@@ -300,7 +313,17 @@ local function build_listener()
300313 stat_prefix = " router" ,
301314 common_http_protocol_options = {
302315 max_headers_count = 200 ,
303- idle_timeout = " 15s" ,
316+ -- Idle timeout for keepalive connections to downstream server
317+ -- (Traffic Server).
318+ --
319+ -- We will buffer Traffic Server's own idle timeout, since
320+ -- Traffic Server should really be responsible for closing its
321+ -- own connections, so this shouldn't necessarily kick in.
322+ -- However, we will still add a timeout here since we've seen
323+ -- cases where Traffic Server doesn't close idle connections as
324+ -- expected (like if Traffic Server's
325+ -- `http.per_server.connection.min` setting is set).
326+ -- idle_timeout = file_config["trafficserver"]["records"]["http"]["keep_alive_no_activity_timeout_out"] + 5 .. "s"
304327 },
305328 generate_request_id = false ,
306329 server_header_transformation = " PASS_THROUGH" ,
0 commit comments