@@ -101,7 +101,10 @@ def _looks_like_uuid(self, value: str) -> bool:
101101
102102 def fetch_runs (self , project_name : str , limit : int ) -> List [Any ]:
103103 """
104- Fetch runs from LangSmith with rate limiting.
104+ Fetch runs from LangSmith with pagination support for large exports.
105+
106+ Due to LangSmith API limitations (max 100 records per call), this method
107+ makes multiple API calls to fetch all requested runs.
105108
106109 Args:
107110 project_name: Name or ID of the LangSmith project
@@ -114,16 +117,130 @@ def fetch_runs(self, project_name: str, limit: int) -> List[Any]:
114117 ProjectNotFoundError: If project doesn't exist
115118 RateLimitError: If rate limit exceeded after retries
116119 """
120+ CHUNK_SIZE = 100 # LangSmith API limit per call
121+
122+ all_runs = []
123+ fetched_count = 0
124+
125+ # Calculate number of pages needed
126+ num_pages = (limit + CHUNK_SIZE - 1 ) // CHUNK_SIZE
127+
128+ # Only show pagination message if multiple pages needed
129+ if num_pages > 1 :
130+ print (f" 📄 Fetching { limit } runs across { num_pages } pages..." )
131+
132+ for page_num in range (num_pages ):
133+ # Calculate how many runs to fetch in this page
134+ remaining = limit - fetched_count
135+ page_size = min (CHUNK_SIZE , remaining )
136+
137+ # Fetch this page
138+ page_runs = self ._fetch_page_with_retry (
139+ project_name = project_name ,
140+ limit = page_size ,
141+ fetched_so_far = fetched_count ,
142+ page_num = page_num + 1 ,
143+ total_pages = num_pages ,
144+ )
145+
146+ # No more runs available
147+ if len (page_runs ) == 0 :
148+ if fetched_count == 0 :
149+ # No runs at all - will be handled by caller
150+ break
151+ else :
152+ # Got some runs but not all requested
153+ print (
154+ f" ℹ️ Reached end of available runs at { fetched_count } (requested { limit } )"
155+ )
156+ break
157+
158+ all_runs .extend (page_runs )
159+ fetched_count += len (page_runs )
160+
161+ # Progress update for multi-page fetches
162+ if num_pages > 1 :
163+ print (
164+ f" ✓ Page { page_num + 1 } /{ num_pages } : { len (page_runs )} runs (Total: { fetched_count } )"
165+ )
166+
167+ # Check if we got fewer than requested - indicates no more runs available
168+ if len (page_runs ) < page_size :
169+ if fetched_count < limit :
170+ print (
171+ f" ℹ️ Only { fetched_count } runs available (requested { limit } )"
172+ )
173+ break
174+
175+ # Reached our limit
176+ if fetched_count >= limit :
177+ break
178+
179+ # Add small delay between pages (not on last page)
180+ if page_num < num_pages - 1 and fetched_count < limit :
181+ time .sleep (0.5 ) # 500ms delay between pages
182+
183+ # Final warning if significantly fewer runs than requested
184+ if fetched_count < limit :
185+ print (f" ⚠️ Warning: Fetched { fetched_count } runs (requested { limit } )" )
186+
187+ return all_runs
188+
189+ def _fetch_page_with_retry (
190+ self ,
191+ project_name : str ,
192+ limit : int ,
193+ fetched_so_far : int ,
194+ page_num : int ,
195+ total_pages : int ,
196+ ) -> List [Any ]:
197+ """
198+ Fetch a single page of runs with exponential backoff retry logic.
199+
200+ This method wraps the SDK's list_runs call with retry logic to handle
201+ transient errors and rate limiting.
202+
203+ Since LangSmith SDK doesn't support offset parameter, we request all runs
204+ up to our position + page size, then skip to our position using islice.
205+
206+ Args:
207+ project_name: Name or ID of the LangSmith project
208+ limit: Number of runs to fetch for this page
209+ fetched_so_far: Number of runs already fetched (used for offset simulation)
210+ page_num: Current page number (1-indexed, for logging)
211+ total_pages: Total number of pages expected (for logging)
212+
213+ Returns:
214+ List of Run objects from this page
215+
216+ Raises:
217+ ProjectNotFoundError: If project doesn't exist
218+ RateLimitError: If rate limit exceeded after retries
219+ """
220+ from itertools import islice
221+
117222 attempt = 0
118223 last_exception = None
119224
120225 while attempt < self .MAX_RETRIES :
121226 try :
122- # Try with project_name parameter first
123- runs = list (
124- self .client .list_runs (project_name = project_name , limit = limit )
227+ # Since LangSmith SDK doesn't support offset parameter,
228+ # we request all runs up to our position + page size,
229+ # then skip to our position using islice
230+ total_to_request = fetched_so_far + limit
231+
232+ # Try with project_name first
233+ runs_iterator = self .client .list_runs (
234+ project_name = project_name , limit = total_to_request
125235 )
126- return runs
236+
237+ # Skip already-fetched runs and take the next page
238+ page_runs = list (
239+ islice (runs_iterator , fetched_so_far , fetched_so_far + limit )
240+ )
241+
242+ return page_runs
243+
127244 except Exception as e :
128245 last_exception = e
129246 error_msg = str (e ).lower ()
@@ -137,12 +254,17 @@ def fetch_runs(self, project_name: str, limit: int) -> List[Any]:
137254 if self ._looks_like_uuid (project_name ):
138255 print ("Trying project ID instead of name..." )
139256 try :
140- runs = list (
141- self .client .list_runs (
142- project_id = project_name , limit = limit
257+ runs_iterator = self .client .list_runs (
258+ project_id = project_name , limit = total_to_request
259+ )
260+ page_runs = list (
261+ islice (
262+ runs_iterator ,
263+ fetched_so_far ,
264+ fetched_so_far + limit ,
143265 )
144266 )
145- return runs
267+ return page_runs
146268 except Exception : # nosec B110
147269 pass # Intentional: Fall through to retry logic if project_id also fails
148270
@@ -158,15 +280,23 @@ def fetch_runs(self, project_name: str, limit: int) -> List[Any]:
158280 attempt += 1
159281 if attempt >= self .MAX_RETRIES :
160282 break
283+
161284 # Exponential backoff
162285 backoff_time = self .INITIAL_BACKOFF * (
163286 self .BACKOFF_MULTIPLIER ** (attempt - 1 )
164287 )
288+
289+ # Only show retry message for multi-page fetches
290+ if total_pages > 1 :
291+ print (
292+ f" ⚠️ Page { page_num } /{ total_pages } failed (attempt { attempt } /{ self .MAX_RETRIES } ), retrying in { backoff_time :.1f} s..."
293+ )
294+
165295 time .sleep (backoff_time )
166296
167297 # If we get here, all retries failed
168298 raise RateLimitError (
169- f"Failed to fetch runs after { self .MAX_RETRIES } attempts. "
299+ f"Failed to fetch page { page_num } / { total_pages } after { self .MAX_RETRIES } attempts. "
170300 f"Last error: { str (last_exception )} "
171301 ) from last_exception
172302
@@ -423,7 +553,11 @@ def main() -> None:
423553 try :
424554 print ("📥 Fetching traces..." )
425555 runs = exporter .fetch_runs (project_name = args .project , limit = args .limit )
426- print (f"✓ Fetched { len (runs )} traces" )
556+ # fetch_runs now provides progress updates, so adjust final message
557+ if len (runs ) != args .limit :
558+ print (f"✓ Fetched { len (runs )} traces (requested { args .limit } )" )
559+ else :
560+ print (f"✓ Fetched { len (runs )} traces" )
427561
428562 if len (runs ) == 0 :
429563 print ("⚠️ No traces found in project" )
0 commit comments