55import asyncio
66import concurrent .futures
77import logging
8+ import threading
89import typing as t
910from contextlib import asynccontextmanager , suppress
1011from datetime import datetime , timezone
11- from queue import Empty , Queue
1212
1313import aiofiles
1414import dagster as dg
2222
2323def run_beat_loop (
2424 interval_seconds : int ,
25- queue : Queue [ bool ] ,
25+ stop : threading . Event ,
2626 beat_loop_func : BeatLoopFunc ,
2727 beat_loop_kwargs : dict [str , t .Any ],
2828) -> None :
2929 logger .info ("Starting heartbeat beat loop" )
3030 while True :
3131 logger .info ("Running heartbeat beat loop function" )
3232 asyncio .run (beat_loop_func (** beat_loop_kwargs ))
33- try :
34- if queue .get (timeout = interval_seconds ):
35- logger .info ("Stopping heartbeat beat loop" )
36- break
37- except Empty :
38- continue
33+ if stop .wait (timeout = float (interval_seconds )):
34+ logger .info ("Stopping heartbeat beat loop" )
35+ break
3936
4037
4138class HeartBeatResource (dg .ConfigurableResource ):
4239 def beat_loop_func (self ) -> BeatLoopFunc :
40+ """Return the function to be called in the heartbeat loop"""
4341 raise NotImplementedError ()
4442
4543 def beat_loop_kwargs (self ) -> dict [str , t .Any ]:
44+ """Return the kwargs to be passed to the heartbeat loop function"""
4645 return {}
4746
48- async def get_last_heartbeat_for (self , job_name : str ) -> datetime | None :
47+ async def get_last_heartbeat_for (self , name : str ) -> datetime | None :
4948 raise NotImplementedError ()
5049
51- async def beat (self , job_name : str ) -> None :
50+ async def beat (self , name : str ) -> None :
5251 raise NotImplementedError ()
5352
5453 @asynccontextmanager
5554 async def heartbeat (
5655 self ,
57- job_name : str ,
56+ name : str ,
5857 interval_seconds : int = 120 ,
5958 log_override : logging .Logger | None = None ,
6059 ) -> t .AsyncIterator [None ]:
6160 log_override = log_override or logger
6261 loop = asyncio .get_running_loop ()
6362 with concurrent .futures .ThreadPoolExecutor (max_workers = 1 ) as executor :
6463 kwargs = self .beat_loop_kwargs ().copy ()
65- kwargs .update ({"job_name" : job_name })
66- queue = Queue [bool ]()
64+ kwargs .update ({"heartbeat_name" : name })
65+ stop = threading .Event ()
66+
67+ # The beat loop must run in a separate thread because dagster's
68+ # async event loop tends to block despite being async. Using a
69+ # thread ensures that the heartbeat will run assuming the process is
70+ # alive and not completely blocked.
71+ #
72+ # So in order to make this work we must pass all of the context we
73+ # need for the beat loop function as the function running in a
74+ # separate thread might not have access to the same context.
75+ # Additionally, this means changing this to be separate processes
76+ # can be done in the future if needed. This is also why the
77+ # functions used for the beat loop are not methods of the resource
78+ # class implementations.
6779 beat_task = loop .run_in_executor (
6880 executor ,
6981 run_beat_loop ,
7082 interval_seconds ,
71- queue ,
83+ stop ,
7284 self .beat_loop_func (),
7385 kwargs ,
7486 )
7587 try :
7688 yield
7789 finally :
78- queue . put ( True )
90+ stop . set ( )
7991 beat_task .cancel ()
8092 with suppress (asyncio .CancelledError ):
8193 await beat_task
@@ -90,10 +102,10 @@ async def async_redis_client(host: str, port: int) -> t.AsyncIterator[Redis]:
90102 await client .aclose ()
91103
92104
93- async def redis_send_heartbeat (* , host : str , port : int , job_name : str ) -> None :
105+ async def redis_send_heartbeat (* , host : str , port : int , heartbeat_name : str ) -> None :
94106 async with async_redis_client (host , port ) as redis_client :
95107 await redis_client .set (
96- f"heartbeat:{ job_name } " , datetime .now (timezone .utc ).isoformat ()
108+ f"heartbeat:{ heartbeat_name } " , datetime .now (timezone .utc ).isoformat ()
97109 )
98110
99111
@@ -107,29 +119,29 @@ def beat_loop_func(self) -> BeatLoopFunc:
107119 def beat_loop_kwargs (self ) -> dict [str , t .Any ]:
108120 return {"host" : self .host , "port" : self .port }
109121
110- async def get_last_heartbeat_for (self , job_name : str ) -> datetime | None :
122+ async def get_last_heartbeat_for (self , name : str ) -> datetime | None :
111123 async with async_redis_client (self .host , self .port ) as redis_client :
112- timestamp = await redis_client .get (f"heartbeat:{ job_name } " )
113- logger .info (f"Fetched heartbeat for job { job_name } : { timestamp } " )
124+ timestamp = await redis_client .get (f"heartbeat:{ name } " )
125+ logger .info (f"Fetched heartbeat ` { name } ` : { timestamp } " )
114126 if isinstance (timestamp , str ):
115127 return datetime .fromisoformat (timestamp )
116128 elif isinstance (timestamp , bytes ):
117129 return datetime .fromisoformat (timestamp .decode ("utf-8" ))
118130 else :
119131 return None
120132
121- async def beat (self , job_name : str ) -> None :
133+ async def beat (self , name : str ) -> None :
122134 return await redis_send_heartbeat (
123- host = self .host , port = self .port , job_name = job_name
135+ host = self .host , port = self .port , heartbeat_name = name
124136 )
125137
126138
127- async def filebased_send_heartbeat (* , directory : str , job_name : str ) -> None :
139+ async def filebased_send_heartbeat (* , directory : str , heartbeat_name : str ) -> None :
128140 from pathlib import Path
129141
130142 import aiofiles
131143
132- filepath = Path (directory ) / f"{ job_name } _heartbeat.txt"
144+ filepath = Path (directory ) / f"{ heartbeat_name } _heartbeat.txt"
133145 async with aiofiles .open (filepath , mode = "w" ) as f :
134146 await f .write (datetime .now (timezone .utc ).isoformat ())
135147
@@ -139,29 +151,25 @@ class FilebasedHeartBeatResource(HeartBeatResource):
139151
140152 directory : str = Field (description = "Directory to store heartbeat files." )
141153
142- async def get_last_heartbeat_for (self , job_name : str ) -> datetime | None :
154+ async def get_last_heartbeat_for (self , name : str ) -> datetime | None :
143155 from pathlib import Path
144156
145- filepath = Path (self .directory ) / f"{ job_name } _heartbeat.txt"
157+ filepath = Path (self .directory ) / f"{ name } _heartbeat.txt"
146158 if not filepath .exists ():
147159 return None
148160 async with aiofiles .open (filepath , mode = "r" ) as f :
149161 timestamp = await f .read ()
150162 return datetime .fromisoformat (timestamp )
151163
152- async def beat (self , job_name : str ) -> None :
153- from pathlib import Path
154-
155- import aiofiles
156-
157- filepath = Path (self .directory ) / f"{ job_name } _heartbeat.txt"
158- async with aiofiles .open (filepath , mode = "w" ) as f :
159- await f .write (datetime .now (timezone .utc ).isoformat ())
164+ async def beat (self , name : str ) -> None :
165+ return await filebased_send_heartbeat (
166+ directory = self .directory , heartbeat_name = name
167+ )
160168
161169 @asynccontextmanager
162170 async def heartbeat (
163171 self ,
164- job_name : str ,
172+ name : str ,
165173 interval_seconds : int = 120 ,
166174 log_override : logging .Logger | None = None ,
167175 ) -> t .AsyncIterator [None ]:
@@ -170,14 +178,12 @@ async def heartbeat(
170178 async def beat_loop ():
171179 while True :
172180 try :
173- await self .beat (job_name )
181+ await self .beat (name )
174182 logger_to_use .info (
175- f"Heartbeat sent for job { job_name } at { datetime .now (timezone .utc ).isoformat ()} "
183+ f"Heartbeat sent for job { name } at { datetime .now (timezone .utc ).isoformat ()} "
176184 )
177185 except Exception as e :
178- logger_to_use .error (
179- f"Error sending heartbeat for job { job_name } : { e } "
180- )
186+ logger_to_use .error (f"Error sending heartbeat for job { name } : { e } " )
181187 await asyncio .sleep (interval_seconds )
182188
183189 beat_task = asyncio .create_task (beat_loop ())
0 commit comments