Skip to content

Commit cd93f76

Browse files
Sai Avalajenkins
authored andcommitted
Suppress netty logs in strato and add serviceidentifier metrics for failed tls handshakes
Seeing a spike in ssl log errors in strato at the moment due to cert rotation upgrade: https://phabricator.twitter.biz/D1264535 Suppress the pkix logs for strato and add serviceidentifier metrics for failed clients so we can identify clients that are failing tls handshakes example: sd.tss-staging.staging.test-server.0:https/tls/ssl_exn/twtr:svc:tss-service:admin-service:staging:atla Differential Revision: https://phabricator.twitter.biz/D1299159
1 parent d8a71ff commit cd93f76

File tree

7 files changed

+503
-27
lines changed

7 files changed

+503
-27
lines changed

finagle-netty4/src/main/scala/com/twitter/finagle/netty4/ssl/server/Netty4ServerSslChannelInitializer.scala

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
package com.twitter.finagle.netty4.ssl.server
22

3-
import com.twitter.finagle.netty4.ssl.{Alpn, Netty4SslHandler}
3+
import com.twitter.finagle.netty4.ssl.Alpn
4+
import com.twitter.finagle.netty4.ssl.Netty4SslHandler
45
import com.twitter.finagle.param.Stats
5-
import com.twitter.finagle.ssl.{ApplicationProtocols, Engine}
6-
import com.twitter.finagle.ssl.server.{
7-
SslServerConfiguration,
8-
SslServerEngineFactory,
9-
SslServerSessionVerifier
10-
}
6+
import com.twitter.finagle.ssl.ApplicationProtocols
7+
import com.twitter.finagle.ssl.Engine
8+
import com.twitter.finagle.ssl.server.SslServerConfiguration
9+
import com.twitter.finagle.ssl.server.SslServerEngineFactory
10+
import com.twitter.finagle.ssl.server.SslServerSessionVerifier
1111
import com.twitter.finagle.transport.Transport
12-
import com.twitter.finagle.{Address, Stack}
13-
import io.netty.channel.{Channel, ChannelInitializer, ChannelPipeline}
12+
import com.twitter.finagle.Address
13+
import com.twitter.finagle.Stack
14+
import io.netty.channel.Channel
15+
import io.netty.channel.ChannelInitializer
16+
import io.netty.channel.ChannelPipeline
1417
import io.netty.handler.ssl.SslHandler
1518
import java.net.InetSocketAddress
1619

@@ -64,7 +67,13 @@ final private[finagle] class Netty4ServerSslChannelInitializer(params: Stack.Par
6467
config: SslServerConfiguration
6568
): SslServerVerificationHandler = {
6669
val sessionVerifier = params[SslServerSessionVerifier.Param].verifier
67-
new SslServerVerificationHandler(sslHandler, remoteAddress, config, sessionVerifier)
70+
val statsReceiver = params[Stats].statsReceiver.scope("tls")
71+
new SslServerVerificationHandler(
72+
sslHandler,
73+
remoteAddress,
74+
config,
75+
sessionVerifier,
76+
statsReceiver)
6877
}
6978

7079
private[this] def addHandlersToPipeline(

finagle-netty4/src/main/scala/com/twitter/finagle/netty4/ssl/server/Netty4ServerSslConfigurations.scala

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,66 @@ private[finagle] object Netty4ServerSslConfigurations {
9090
Netty4SslConfigurations.unwrapTryContextBuilder(builder)
9191
}
9292

93+
/**
94+
* Wraps trust credentials to inject service identifier capturing for servers.
95+
* This allows us to track which clients fail SSL handshakes even when PKIX validation fails.
96+
*
97+
* For CertCollection (the common mTLS case), we convert it to a TrustManagerFactory and wrap it.
98+
* For TrustManagerFactory, we wrap it directly.
99+
* Other types pass through to preserve existing error handling.
100+
*/
101+
private def wrapTrustCredentialsForCapture(
102+
trustCredentials: com.twitter.finagle.ssl.TrustCredentials
103+
): com.twitter.finagle.ssl.TrustCredentials = {
104+
import com.twitter.util.Try
105+
import com.twitter.util.security.X509CertificateFile
106+
107+
trustCredentials match {
108+
case com.twitter.finagle.ssl.TrustCredentials.TrustManagerFactory(factory) =>
109+
com.twitter.finagle.ssl.TrustCredentials.TrustManagerFactory(
110+
new ServiceIdentifierCapturingTrustManagerFactory(factory)
111+
)
112+
113+
case com.twitter.finagle.ssl.TrustCredentials.CertCollection(file) =>
114+
// Convert CertCollection to TrustManagerFactory, then wrap it
115+
// This is the common case for mTLS servers using ca-bundle.crt
116+
// We use X509CertificateFile for validation, matching Netty's behavior
117+
val tmfResult = for {
118+
certs <- new X509CertificateFile(file).readX509Certificates()
119+
tmf <- Try {
120+
val factory = javax.net.ssl.TrustManagerFactory.getInstance(
121+
javax.net.ssl.TrustManagerFactory.getDefaultAlgorithm
122+
)
123+
val ks = java.security.KeyStore.getInstance(java.security.KeyStore.getDefaultType)
124+
ks.load(null, null)
125+
126+
var i = 0
127+
certs.foreach { cert =>
128+
ks.setCertificateEntry(s"cert-$i", cert)
129+
i += 1
130+
}
131+
132+
factory.init(ks)
133+
factory
134+
}
135+
} yield tmf
136+
137+
// If conversion succeeds, wrap it; otherwise pass through original to let Netty handle the error
138+
tmfResult match {
139+
case com.twitter.util.Return(tmf) =>
140+
com.twitter.finagle.ssl.TrustCredentials.TrustManagerFactory(
141+
new ServiceIdentifierCapturingTrustManagerFactory(tmf)
142+
)
143+
case com.twitter.util.Throw(_) =>
144+
// Conversion failed (e.g., empty file, invalid format)
145+
// Pass through original CertCollection to preserve existing error handling
146+
trustCredentials
147+
}
148+
149+
case other => other // Unspecified, Insecure, X509Certificates - pass through
150+
}
151+
}
152+
93153
/**
94154
* Creates an `SslContext` based on the supplied `SslServerConfiguration`. This method uses
95155
* the `KeyCredentials`, `TrustCredentials`, `CipherSuites`, and `ApplicationProtocols` from the provided
@@ -98,7 +158,9 @@ private[finagle] object Netty4ServerSslConfigurations {
98158
def createServerContext(config: SslServerConfiguration, forceJdk: Boolean): SslContext = {
99159
val builder = startServerWithKey(config.keyCredentials)
100160
val withProvider = Netty4SslConfigurations.configureProvider(builder, forceJdk)
101-
val withTrust = Netty4SslConfigurations.configureTrust(withProvider, config.trustCredentials)
161+
// Wrap trust credentials to capture service identifiers during cert validation
162+
val wrappedTrust = wrapTrustCredentialsForCapture(config.trustCredentials)
163+
val withTrust = Netty4SslConfigurations.configureTrust(withProvider, wrappedTrust)
102164
val withCiphers = config.cipherSuites match {
103165
case CipherSuites.Enabled(s) => withTrust.ciphers(s.asJava)
104166
case _ => withTrust
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
package com.twitter.finagle.netty4.ssl.server
2+
3+
import java.net.Socket
4+
import java.security.cert.X509Certificate
5+
import javax.net.ssl.SSLEngine
6+
import javax.net.ssl.X509ExtendedTrustManager
7+
import javax.net.ssl.X509TrustManager
8+
9+
/**
10+
* Context for storing service identifiers extracted during SSL handshake verification.
11+
* This is used to preserve the client service identifier even when certificate validation fails.
12+
*/
13+
object ServiceIdentifierContext {
14+
private val threadLocal = new ThreadLocal[Option[String]]()
15+
16+
def set(serviceId: Option[String]): Unit = threadLocal.set(serviceId)
17+
def get(): Option[String] = Option(threadLocal.get()).flatten
18+
def clear(): Unit = threadLocal.remove()
19+
}
20+
21+
/**
22+
* A trust manager that wraps another trust manager and captures the service identifier
23+
* from client certificates during verification, before validation occurs.
24+
*
25+
* This allows us to track which clients are failing SSL handshakes, even when their
26+
* certificates fail PKIX path building validation.
27+
*/
28+
class ServiceIdentifierCapturingTrustManager(delegate: X509TrustManager)
29+
extends X509ExtendedTrustManager {
30+
31+
/**
32+
* Common logic to capture service identifier and delegate to trust manager.
33+
* Called by all checkClientTrusted overloads.
34+
*/
35+
private def captureAndVerify(chain: Array[X509Certificate])(verify: => Unit): Unit = {
36+
ServiceIdentifierContext.set(ServiceIdentifierExtractor.extractServiceIdentifier(chain))
37+
try {
38+
verify
39+
} finally {
40+
// Don't clear here - let the verification handler retrieve it
41+
}
42+
}
43+
44+
// Base X509TrustManager method
45+
override def checkClientTrusted(chain: Array[X509Certificate], authType: String): Unit = {
46+
captureAndVerify(chain) {
47+
delegate.checkClientTrusted(chain, authType)
48+
}
49+
}
50+
51+
// X509ExtendedTrustManager Socket overload
52+
override def checkClientTrusted(
53+
chain: Array[X509Certificate],
54+
authType: String,
55+
socket: Socket
56+
): Unit = {
57+
captureAndVerify(chain) {
58+
delegate match {
59+
case extDelegate: X509ExtendedTrustManager =>
60+
extDelegate.checkClientTrusted(chain, authType, socket)
61+
case _ =>
62+
delegate.checkClientTrusted(chain, authType)
63+
}
64+
}
65+
}
66+
67+
// X509ExtendedTrustManager SSLEngine overload (used by Netty with OpenSSL)
68+
override def checkClientTrusted(
69+
chain: Array[X509Certificate],
70+
authType: String,
71+
engine: SSLEngine
72+
): Unit = {
73+
captureAndVerify(chain) {
74+
delegate match {
75+
case extDelegate: X509ExtendedTrustManager =>
76+
extDelegate.checkClientTrusted(chain, authType, engine)
77+
case _ =>
78+
delegate.checkClientTrusted(chain, authType)
79+
}
80+
}
81+
}
82+
83+
// Server trust verification - we don't capture service IDs here since we're the server
84+
// These methods validate outbound connections (when the server acts as a client)
85+
override def checkServerTrusted(chain: Array[X509Certificate], authType: String): Unit = {
86+
delegate.checkServerTrusted(chain, authType)
87+
}
88+
89+
override def checkServerTrusted(
90+
chain: Array[X509Certificate],
91+
authType: String,
92+
socket: Socket
93+
): Unit = {
94+
delegate match {
95+
case extDelegate: X509ExtendedTrustManager =>
96+
extDelegate.checkServerTrusted(chain, authType, socket)
97+
case _ =>
98+
delegate.checkServerTrusted(chain, authType)
99+
}
100+
}
101+
102+
override def checkServerTrusted(
103+
chain: Array[X509Certificate],
104+
authType: String,
105+
engine: SSLEngine
106+
): Unit = {
107+
delegate match {
108+
case extDelegate: X509ExtendedTrustManager =>
109+
extDelegate.checkServerTrusted(chain, authType, engine)
110+
case _ =>
111+
delegate.checkServerTrusted(chain, authType)
112+
}
113+
}
114+
115+
override def getAcceptedIssuers: Array[X509Certificate] = delegate.getAcceptedIssuers
116+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package com.twitter.finagle.netty4.ssl.server
2+
3+
import java.security.KeyStore
4+
import javax.net.ssl.ManagerFactoryParameters
5+
import javax.net.ssl.TrustManager
6+
import javax.net.ssl.TrustManagerFactory
7+
import javax.net.ssl.TrustManagerFactorySpi
8+
import javax.net.ssl.X509TrustManager
9+
10+
/**
11+
* A TrustManagerFactory that wraps another factory and injects ServiceIdentifierCapturingTrustManager
12+
* to capture service identifiers during certificate verification.
13+
*/
14+
class ServiceIdentifierCapturingTrustManagerFactory(delegate: TrustManagerFactory)
15+
extends TrustManagerFactory(
16+
new ServiceIdentifierCapturingTrustManagerFactorySpi(delegate),
17+
delegate.getProvider,
18+
delegate.getAlgorithm
19+
)
20+
21+
private class ServiceIdentifierCapturingTrustManagerFactorySpi(delegate: TrustManagerFactory)
22+
extends TrustManagerFactorySpi {
23+
24+
override def engineInit(ks: KeyStore): Unit = {
25+
delegate.init(ks)
26+
}
27+
28+
override def engineInit(spec: ManagerFactoryParameters): Unit = {
29+
delegate.init(spec)
30+
}
31+
32+
override def engineGetTrustManagers(): Array[TrustManager] = {
33+
delegate.getTrustManagers.map {
34+
case x509tm: X509TrustManager =>
35+
new ServiceIdentifierCapturingTrustManager(x509tm)
36+
case other => other
37+
}
38+
}
39+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package com.twitter.finagle.netty4.ssl.server
2+
3+
import java.net.URI
4+
import java.security.cert.X509Certificate
5+
import javax.security.auth.x500.X500Principal
6+
import scala.util.control.NonFatal
7+
8+
/**
9+
* Utility for extracting service identifiers from X.509 certificates.
10+
* Supports both SAN URI (preferred) and Common Name (CN) fallback extraction.
11+
*/
12+
object ServiceIdentifierExtractor {
13+
14+
// SAN type constants from X.509
15+
private val SAN_URI_TYPE = 6
16+
private val SAN_DNS_TYPE = 2
17+
18+
// Regex to match service identifier in Common Name (CN)
19+
private val ServiceIdentifierRegex =
20+
"^(?:twtr:svc:)?([a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+(?::[a-zA-Z0-9_-]+)?(?:\\.[a-zA-Z0-9_-]+)*)$".r
21+
22+
/**
23+
* Extracts service identifier from an X.509 certificate chain.
24+
*
25+
* Extraction order:
26+
* 1. SAN URI (type 6) with scheme "twtr:" (preferred per RFC 5280)
27+
* 2. Common Name (CN) matching service identifier pattern
28+
* 3. Non-mTLS fallback for other CN values
29+
*
30+
* Expected format: twtr:svc:role:service:environment:zone
31+
*
32+
* @param chain Array of certificates (uses first certificate)
33+
* @return Optional service identifier string, or fallback value
34+
*/
35+
def extractServiceIdentifier(chain: Array[X509Certificate]): Option[String] = {
36+
if (chain == null || chain.isEmpty) {
37+
return Some("no_peer_cert")
38+
}
39+
40+
try {
41+
val cert = chain(0)
42+
43+
// Try SAN URI first (preferred per RFC 5280)
44+
val serviceIdFromSan = Option(cert.getSubjectAlternativeNames).flatMap { sans =>
45+
import scala.collection.JavaConverters._
46+
sans.asScala.collectFirst {
47+
case list if list.size() >= 2 && list.get(0) == SAN_URI_TYPE =>
48+
val uriString = list.get(1).toString
49+
try {
50+
val uri = new URI(uriString)
51+
if (uri.getScheme == "twtr" && uri.getSchemeSpecificPart != null) {
52+
Some(uriString) // Returns the full URI: twtr:svc:...
53+
} else None
54+
} catch {
55+
case NonFatal(_) => None
56+
}
57+
}.flatten
58+
}
59+
60+
serviceIdFromSan match {
61+
case Some(id) => Some(id)
62+
case None =>
63+
// Fallback to Common Name (CN)
64+
val dn = cert.getSubjectX500Principal.getName(X500Principal.RFC2253)
65+
val cn = dn
66+
.split(',')
67+
.find(_.trim.startsWith("CN="))
68+
.map(_.trim.substring(3))
69+
70+
cn.flatMap {
71+
case id if id.startsWith("twtr:svc:") => Some(id) // Already has prefix
72+
case ServiceIdentifierRegex(id) => Some(s"twtr:svc:$id") // Add prefix
73+
case other => Some(s"non_mtls:${other.take(100)}")
74+
}.orElse(Some(s"non_mtls:${dn.take(100)}"))
75+
}
76+
} catch {
77+
case NonFatal(_) => Some("extraction_failed")
78+
}
79+
}
80+
}

0 commit comments

Comments
 (0)