Skip to content

Commit 6ed3043

Browse files
authored
fix: grpc timeout segment data loss (#116)
1 parent 8e9ea9d commit 6ed3043

File tree

4 files changed

+26
-8
lines changed

4 files changed

+26
-8
lines changed

skywalking/agent/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#
1717

1818
import atexit
19-
from queue import Queue
19+
from queue import Queue, Full
2020
from threading import Thread, Event
2121
from typing import TYPE_CHECKING
2222

@@ -109,8 +109,7 @@ def connected():
109109

110110

111111
def archive(segment: 'Segment'):
112-
if __queue.full():
112+
try: # unlike checking __queue.full() then inserting, this is atomic
113+
__queue.put(segment, block=False)
114+
except Full:
113115
logger.warning('the queue is full, the segment will be abandoned')
114-
return
115-
116-
__queue.put(segment)

skywalking/agent/protocol/grpc.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
import logging
1919
from skywalking.loggings import logger
2020
import traceback
21-
from queue import Queue, Empty
21+
from queue import Queue, Empty, Full
22+
from time import time
2223

2324
import grpc
2425

@@ -68,10 +69,16 @@ def on_error(self):
6869
self.channel.subscribe(self._cb, try_to_connect=True)
6970

7071
def report(self, queue: Queue, block: bool = True):
72+
start = time()
73+
segment = None
74+
7175
def generator():
76+
nonlocal segment
77+
7278
while True:
7379
try:
74-
segment = queue.get(block=block) # type: Segment
80+
timeout = max(0, config.QUEUE_TIMEOUT - int(time() - start)) # type: int
81+
segment = queue.get(block=block, timeout=timeout) # type: Segment
7582
except Empty:
7683
return
7784

@@ -120,5 +127,12 @@ def generator():
120127

121128
try:
122129
self.traces_reporter.report(generator())
130+
123131
except grpc.RpcError:
124132
self.on_error()
133+
134+
if segment:
135+
try:
136+
queue.put(segment, block=False)
137+
except Full:
138+
pass

skywalking/client/grpc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,4 @@ def __init__(self, channel: grpc.Channel):
5555
self.report_stub = TraceSegmentReportServiceStub(channel)
5656

5757
def report(self, generator):
58-
self.report_stub.collect(generator)
58+
self.report_stub.collect(generator, timeout=config.GRPC_TIMEOUT)

skywalking/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@
2323
if TYPE_CHECKING:
2424
from typing import List
2525

26+
# In order to prevent timeouts and possible segment loss make sure QUEUE_TIMEOUT is always at least few seconds lower
27+
# than GRPC_TIMEOUT.
28+
GRPC_TIMEOUT = 300 # type: int
29+
QUEUE_TIMEOUT = 240 # type: int
30+
2631
RE_IGNORE_PATH = re.compile('^$') # type: re.Pattern
2732

2833
service_name = os.getenv('SW_AGENT_NAME') or 'Python Service Name' # type: str

0 commit comments

Comments
 (0)