Skip to content

Commit dca1a2e

Browse files
authored
Merge pull request #241 from emeraldpay/fix/grpc-status-disconnects
2 parents 61aea11 + efa4809 commit dca1a2e

File tree

3 files changed

+146
-109
lines changed

3 files changed

+146
-109
lines changed

src/main/kotlin/io/emeraldpay/dshackle/startup/ConfiguredUpstreams.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,11 +340,12 @@ open class ConfiguredUpstreams(
340340
this.options = options
341341
}
342342
log.info("Using ALL CHAINS (gRPC) upstream, at ${endpoint.host}:${endpoint.port}")
343-
ds.start()
343+
ds.subscribeUpstreamChanges()
344344
.doOnNext {
345345
log.info("Chain ${it.chain} ${it.type} through gRPC at ${endpoint.host}:${endpoint.port}. With caps: ${it.upstream.getCapabilities()}")
346346
}
347347
.subscribe(currentUpstreams::update)
348+
ds.startStatusUpdates()
348349
}
349350

350351
private fun buildHttpClient(config: UpstreamsConfig.Upstream<out UpstreamsConfig.RpcConnection>): JsonRpcHttpClient? {

src/main/kotlin/io/emeraldpay/dshackle/upstream/grpc/GrpcHead.kt

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,23 @@ package io.emeraldpay.dshackle.upstream.grpc
1818
import io.emeraldpay.api.proto.BlockchainOuterClass
1919
import io.emeraldpay.api.proto.Common
2020
import io.emeraldpay.api.proto.ReactorBlockchainGrpc
21-
import io.emeraldpay.dshackle.Defaults
21+
import io.emeraldpay.dshackle.SilentException
22+
import io.emeraldpay.dshackle.commons.DurableFlux
2223
import io.emeraldpay.dshackle.data.BlockContainer
2324
import io.emeraldpay.dshackle.upstream.AbstractHead
2425
import io.emeraldpay.dshackle.upstream.DefaultUpstream
2526
import io.emeraldpay.dshackle.upstream.UpstreamAvailability
26-
import io.emeraldpay.etherjar.rpc.RpcException
27+
import io.emeraldpay.grpc.BlockchainType
2728
import io.emeraldpay.grpc.Chain
2829
import org.reactivestreams.Publisher
2930
import org.slf4j.LoggerFactory
3031
import org.springframework.context.Lifecycle
32+
import org.springframework.util.backoff.ExponentialBackOff
3133
import reactor.core.Disposable
3234
import reactor.core.publisher.Flux
3335
import reactor.core.publisher.Mono
34-
import reactor.util.retry.Retry
3536
import java.time.Duration
37+
import java.util.concurrent.atomic.AtomicBoolean
3638
import java.util.function.Function
3739

3840
class GrpcHead(
@@ -54,6 +56,7 @@ class GrpcHead(
5456
}
5557

5658
private var headSubscription: Disposable? = null
59+
private val shouldBeRunning = AtomicBoolean(false)
5760

5861
/**
5962
* Initiate a new head subscription with connection to the remote
@@ -64,51 +67,54 @@ class GrpcHead(
6467
}
6568
log.debug("Start Head subscription to ${parent.getId()}")
6669

67-
val source = Flux.concat(
68-
// first connect immediately
69-
Flux.just(remote),
70-
// following requests do with delay, give it a time to recover
71-
Flux.just(remote).repeat().delayElements(Defaults.retryConnection)
72-
).flatMap(this::subscribeHead)
73-
74-
internalStart(source)
70+
val blocks = DurableFlux(
71+
{ connect(remote) },
72+
ExponentialBackOff(100, 1.5),
73+
log,
74+
shouldBeRunning,
75+
)
76+
headSubscription = super.follow(blocks.connect())
7577
}
7678

77-
fun subscribeHead(client: ReactorBlockchainGrpc.ReactorBlockchainStub): Publisher<BlockchainOuterClass.ChainHead> {
79+
private fun connect(remote: ReactorBlockchainGrpc.ReactorBlockchainStub): Flux<BlockContainer> {
7880
val chainRef = Common.Chain.newBuilder()
7981
.setTypeValue(chain.id)
8082
.build()
81-
return client.subscribeHead(chainRef)
82-
// simple retry on failure, if eventually failed then it supposed to resubscribe later from outer method
83-
.retryWhen(Retry.backoff(4, Duration.ofSeconds(1)))
84-
.onErrorContinue { err, _ ->
85-
log.warn("Disconnected $chain from ${parent.getId()}: ${err.message}")
83+
return remote.subscribeHead(chainRef)
84+
// if nothing returned for a relatively long period it's probably because of a broken connection, so in this case we force to drop the connection
85+
.timeout(
86+
expectEventsTime(),
87+
Mono.fromCallable { log.info("No events received from ${parent.getId()}. Reconnecting...") }
88+
.then(Mono.error(SilentException.Timeout("No Events")))
89+
)
90+
.doOnError { err ->
91+
if (err !is SilentException) {
92+
log.warn("Disconnected $chain from ${parent.getId()}: ${err.message}")
93+
}
8694
parent.setStatus(UpstreamAvailability.UNAVAILABLE)
87-
Mono.empty<BlockchainOuterClass.ChainHead>()
8895
}
96+
.map(converter)
97+
.distinctUntilChanged(BlockContainer::hash)
98+
.transform(enhanced())
8999
}
90100

91-
/**
92-
* Initiate a new head from provided source of head details
93-
*/
94-
private fun internalStart(source: Flux<BlockchainOuterClass.ChainHead>) {
95-
var blocks = source.map(converter)
96-
.distinctUntilChanged {
97-
it.hash
101+
private fun expectEventsTime(): Duration {
102+
return try {
103+
when (BlockchainType.from(chain)) {
104+
BlockchainType.BITCOIN -> Duration.ofHours(1)
105+
BlockchainType.ETHEREUM -> Duration.ofMinutes(5)
98106
}
99-
if (enhancer != null) {
100-
blocks = blocks.flatMap(enhancer)
107+
} catch (e: IllegalArgumentException) {
108+
Duration.ofMinutes(15)
101109
}
110+
}
102111

103-
blocks = blocks.onErrorContinue { err, _ ->
104-
if (err is RpcException) {
105-
log.error("Head subscription error on ${parent.getId()}. ${err.javaClass.name}:${err.message}")
106-
} else {
107-
log.error("Head subscription error on ${parent.getId()}. ${err.javaClass.name}:${err.message}", err)
108-
}
112+
private fun enhanced(): Function<Flux<BlockContainer>, Flux<BlockContainer>> {
113+
return if (enhancer != null) {
114+
Function { blocks -> blocks.flatMap(enhancer) }
115+
} else {
116+
Function.identity()
109117
}
110-
111-
headSubscription = super.follow(blocks)
112118
}
113119

114120
override fun isRunning(): Boolean {
@@ -117,10 +123,12 @@ class GrpcHead(
117123

118124
override fun start() {
119125
headSubscription?.dispose()
126+
shouldBeRunning.set(true)
120127
this.internalStart(remote)
121128
}
122129

123130
override fun stop() {
131+
shouldBeRunning.set(false)
124132
headSubscription?.dispose()
125133
}
126134
}

src/main/kotlin/io/emeraldpay/dshackle/upstream/grpc/GrpcUpstreams.kt

Lines changed: 101 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package io.emeraldpay.dshackle.upstream.grpc
1919
import io.emeraldpay.api.proto.BlockchainOuterClass
2020
import io.emeraldpay.api.proto.ReactorBlockchainGrpc
2121
import io.emeraldpay.dshackle.FileResolver
22+
import io.emeraldpay.dshackle.commons.DurableFlux
2223
import io.emeraldpay.dshackle.config.AuthConfig
2324
import io.emeraldpay.dshackle.config.UpstreamsConfig
2425
import io.emeraldpay.dshackle.monitoring.Channel
@@ -46,12 +47,15 @@ import io.netty.handler.ssl.SslContextBuilder
4647
import org.apache.commons.lang3.StringUtils
4748
import org.apache.commons.lang3.exception.ExceptionUtils
4849
import org.slf4j.LoggerFactory
50+
import org.springframework.util.backoff.ExponentialBackOff
4951
import reactor.core.Disposable
5052
import reactor.core.publisher.Flux
53+
import reactor.core.scheduler.Schedulers
5154
import java.net.ConnectException
5255
import java.time.Duration
53-
import java.util.concurrent.atomic.AtomicReference
56+
import java.util.concurrent.atomic.AtomicBoolean
5457
import java.util.concurrent.locks.ReentrantLock
58+
import java.util.function.Supplier
5559
import kotlin.concurrent.withLock
5660

5761
class GrpcUpstreams(
@@ -66,40 +70,79 @@ class GrpcUpstreams(
6670

6771
var options = UpstreamsConfig.PartialOptions.getDefaults().build()
6872

69-
private var client: ReactorBlockchainGrpc.ReactorBlockchainStub? = null
73+
private var clientValue: ReactorBlockchainGrpc.ReactorBlockchainStub? = null
7074
private val known = HashMap<Chain, DefaultUpstream>()
7175
private val lock = ReentrantLock()
7276

73-
fun start(): Flux<UpstreamChange> {
74-
val channel: ManagedChannelBuilder<*> = if (conn.auth != null && StringUtils.isNotEmpty(conn.auth!!.ca)) {
75-
NettyChannelBuilder.forAddress(conn.host, conn.port)
76-
// some messages are very large. many of them in megabytes, some even in gigabytes (ex. ETH Traces)
77-
.maxInboundMessageSize(Int.MAX_VALUE)
78-
.useTransportSecurity()
79-
.enableRetry()
80-
.maxRetryAttempts(3)
81-
.sslContext(withTls(conn.auth!!))
82-
} else {
83-
ManagedChannelBuilder.forAddress(conn.host, conn.port)
84-
.let {
85-
if (conn.autoTls == true) {
86-
it.useTransportSecurity()
87-
} else {
88-
log.warn("Using insecure connection to ${conn.host}:${conn.port}")
89-
it.usePlaintext()
77+
private val client: ReactorBlockchainGrpc.ReactorBlockchainStub
78+
get() {
79+
if (clientValue != null) {
80+
return clientValue!!
81+
}
82+
val channel: ManagedChannelBuilder<*> = if (conn.auth != null && StringUtils.isNotEmpty(conn.auth!!.ca)) {
83+
NettyChannelBuilder.forAddress(conn.host, conn.port)
84+
// some messages are very large. many of them in megabytes, some even in gigabytes (ex. ETH Traces)
85+
.maxInboundMessageSize(Int.MAX_VALUE)
86+
.useTransportSecurity()
87+
.enableRetry()
88+
.maxRetryAttempts(3)
89+
.sslContext(withTls(conn.auth!!))
90+
} else {
91+
ManagedChannelBuilder.forAddress(conn.host, conn.port)
92+
.let {
93+
if (conn.autoTls == true) {
94+
it.useTransportSecurity()
95+
} else {
96+
log.warn("Using insecure connection to ${conn.host}:${conn.port}")
97+
it.usePlaintext()
98+
}
9099
}
91-
}
100+
}
101+
102+
this.clientValue = ReactorBlockchainGrpc.newReactorStub(channel.build())
103+
return this.clientValue!!
92104
}
93105

94-
val client = ReactorBlockchainGrpc.newReactorStub(channel.build())
95-
this.client = client
106+
fun subscribeUpstreamChanges(): Flux<UpstreamChange> {
107+
val connect = {
108+
Flux.interval(Duration.ZERO, Duration.ofMinutes(1))
109+
.flatMap { client.describe(BlockchainOuterClass.DescribeRequest.newBuilder().build()) }
110+
.transform(catchIOError())
111+
.flatMap(::processDescription)
112+
.doOnError { t -> log.error("Failed to process update from gRPC upstream $id", t) }
113+
}
96114

97-
val statusSubscription = AtomicReference<Disposable>()
115+
return DurableFlux(
116+
connect,
117+
ExponentialBackOff(100L, 1.5),
118+
log,
119+
AtomicBoolean(true)
120+
).connect()
121+
}
98122

99-
val updates = Flux.interval(Duration.ZERO, Duration.ofMinutes(1))
100-
.flatMap {
101-
client.describe(BlockchainOuterClass.DescribeRequest.newBuilder().build())
102-
}.onErrorContinue { t, _ ->
123+
fun startStatusUpdates(): Disposable {
124+
val connection = DurableFlux(
125+
{
126+
client
127+
.subscribeStatus(BlockchainOuterClass.StatusRequest.newBuilder().build())
128+
.transform(catchIOError())
129+
},
130+
ExponentialBackOff(100L, 1.5),
131+
log,
132+
AtomicBoolean(true)
133+
)
134+
return connection
135+
.connect()
136+
.subscribeOn(Schedulers.boundedElastic())
137+
.subscribe { value ->
138+
val chain = Chain.byId(value.chain.number)
139+
known[chain]?.onStatus(value)
140+
}
141+
}
142+
143+
fun <T> catchIOError(): java.util.function.Function<Flux<T>, Flux<T>> {
144+
return java.util.function.Function<Flux<T>, Flux<T>> { source ->
145+
source.onErrorContinue { t, _ ->
103146
if (ExceptionUtils.indexOfType(t, ConnectException::class.java) >= 0) {
104147
log.warn("gRPC upstream ${conn.host}:${conn.port} is unavailable. (${t.javaClass}: ${t.message})")
105148
known.values.forEach {
@@ -108,25 +151,8 @@ class GrpcUpstreams(
108151
} else {
109152
log.error("Failed to get description from ${conn.host}:${conn.port}", t)
110153
}
111-
}.flatMap { value ->
112-
processDescription(value)
113-
}.doOnNext {
114-
val subscription = client.subscribeStatus(BlockchainOuterClass.StatusRequest.newBuilder().build())
115-
.subscribe { value ->
116-
val chain = Chain.byId(value.chain.number)
117-
if (chain != Chain.UNSPECIFIED) {
118-
known[chain]?.onStatus(value)
119-
}
120-
}
121-
statusSubscription.updateAndGet { prev ->
122-
prev?.dispose()
123-
subscription
124-
}
125-
}.doOnError { t ->
126-
log.error("Failed to process update from gRPC upstream $id", t)
127154
}
128-
129-
return updates
155+
}
130156
}
131157

132158
fun processDescription(value: BlockchainOuterClass.DescribeResponse): Flux<UpstreamChange> {
@@ -180,29 +206,31 @@ class GrpcUpstreams(
180206
}
181207

182208
fun getOrCreate(chain: Chain): UpstreamChange {
183-
val metricsTags = listOf(
184-
Tag.of("upstream", id),
185-
Tag.of("chain", chain.chainCode)
186-
)
209+
val metrics = Supplier {
210+
val metricsTags = listOf(
211+
Tag.of("upstream", id),
212+
Tag.of("chain", chain.chainCode)
213+
)
187214

188-
val metrics = RpcMetrics(
189-
metricsTags,
190-
timer = Timer.builder("upstream.grpc.conn")
191-
.description("Request time through a Dshackle/gRPC connection")
192-
.tags(metricsTags)
193-
.publishPercentileHistogram()
194-
.register(Metrics.globalRegistry),
195-
fails = Counter.builder("upstream.grpc.fail")
196-
.description("Number of failures of Dshackle/gRPC requests")
197-
.tags(metricsTags)
198-
.register(Metrics.globalRegistry),
199-
responseSize = DistributionSummary.builder("upstream.grpc.response.size")
200-
.description("Size of Dshackle/gRPC responses")
201-
.baseUnit("Bytes")
202-
.tags(metricsTags)
203-
.register(Metrics.globalRegistry),
204-
connectionMetrics = ConnectionMetrics(metricsTags)
205-
)
215+
RpcMetrics(
216+
metricsTags,
217+
timer = Timer.builder("upstream.grpc.conn")
218+
.description("Request time through a Dshackle/gRPC connection")
219+
.tags(metricsTags)
220+
.publishPercentileHistogram()
221+
.register(Metrics.globalRegistry),
222+
fails = Counter.builder("upstream.grpc.fail")
223+
.description("Number of failures of Dshackle/gRPC requests")
224+
.tags(metricsTags)
225+
.register(Metrics.globalRegistry),
226+
responseSize = DistributionSummary.builder("upstream.grpc.response.size")
227+
.description("Size of Dshackle/gRPC responses")
228+
.baseUnit("Bytes")
229+
.tags(metricsTags)
230+
.register(Metrics.globalRegistry),
231+
connectionMetrics = ConnectionMetrics(metricsTags)
232+
)
233+
}
206234

207235
val blockchainType = BlockchainType.from(chain)
208236
if (blockchainType == BlockchainType.ETHEREUM) {
@@ -214,14 +242,14 @@ class GrpcUpstreams(
214242
}
215243
}
216244

217-
fun getOrCreateEthereum(chain: Chain, metrics: RpcMetrics): UpstreamChange {
245+
fun getOrCreateEthereum(chain: Chain, metrics: Supplier<RpcMetrics>): UpstreamChange {
218246
lock.withLock {
219247
val current = known[chain]
220248
return if (current == null) {
221-
val rpcClient = JsonRpcGrpcClient(client!!, chain, metrics) {
249+
val rpcClient = JsonRpcGrpcClient(client, chain, metrics.get()) {
222250
currentRequestLogWriter.wrap(it, id, Channel.DSHACKLE)
223251
}
224-
val created = EthereumGrpcUpstream(id, forkWatchFactory.create(chain), role, chain, this.options, client!!, rpcClient)
252+
val created = EthereumGrpcUpstream(id, forkWatchFactory.create(chain), role, chain, this.options, client, rpcClient)
225253
created.timeout = this.options.timeout
226254
known[chain] = created
227255
created.start()
@@ -232,14 +260,14 @@ class GrpcUpstreams(
232260
}
233261
}
234262

235-
fun getOrCreateBitcoin(chain: Chain, metrics: RpcMetrics): UpstreamChange {
263+
fun getOrCreateBitcoin(chain: Chain, metrics: Supplier<RpcMetrics>): UpstreamChange {
236264
lock.withLock {
237265
val current = known[chain]
238266
return if (current == null) {
239-
val rpcClient = JsonRpcGrpcClient(client!!, chain, metrics) {
267+
val rpcClient = JsonRpcGrpcClient(client, chain, metrics.get()) {
240268
currentRequestLogWriter.wrap(it, id, Channel.DSHACKLE)
241269
}
242-
val created = BitcoinGrpcUpstream(id, forkWatchFactory.create(chain), role, chain, this.options, client!!, rpcClient)
270+
val created = BitcoinGrpcUpstream(id, forkWatchFactory.create(chain), role, chain, this.options, client, rpcClient)
243271
created.timeout = this.options.timeout
244272
known[chain] = created
245273
created.start()

0 commit comments

Comments
 (0)