15
15
16
16
from __future__ import annotations
17
17
18
- import hashlib
18
+ import binascii
19
19
import logging
20
20
import posixpath
21
21
import threading
22
22
from concurrent .futures import Future , ThreadPoolExecutor
23
23
from contextlib import ExitStack
24
24
from dataclasses import asdict , dataclass
25
- from functools import partial
25
+ from functools import lru_cache , partial
26
26
from os import environ
27
27
from time import time
28
28
from typing import Any , Callable , Final , Literal
@@ -159,15 +159,15 @@ def _calculate_ref_path(
159
159
# TODO: experimental with using the trace_id and span_id, or fetching
160
160
# gen_ai.response.id from the active span.
161
161
system_instruction_hash = None
162
- # Use an md5 hash of the system instructions as a filename, when system instructions are text.
163
162
if all (isinstance (x , types .Text ) for x in system_instruction ):
164
- md5_hash = hashlib .md5 ()
165
- md5_hash .update (
166
- "\n " .join (x .content for x in system_instruction ).encode ( # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue, reportUnknownArgumentType]
167
- "utf-8"
163
+ # Get a checksum of the text.
164
+ system_instruction_hash = hex (
165
+ binascii .crc32 (
166
+ "\n " .join (x .content for x in system_instruction ).encode ( # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue, reportUnknownArgumentType]
167
+ "utf-8"
168
+ )
168
169
)
169
170
)
170
- system_instruction_hash = md5_hash .hexdigest ()
171
171
uuid_str = str (uuid4 ())
172
172
return CompletionRefs (
173
173
inputs_ref = posixpath .join (
@@ -182,12 +182,17 @@ def _calculate_ref_path(
182
182
),
183
183
)
184
184
185
+ @lru_cache (maxsize = 512 )
186
+ def _file_exists (self , path : str ) -> bool :
187
+ # https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.exists
188
+ return self ._fs .exists (path )
189
+
185
190
def _do_upload (
186
191
self , path : str , json_encodeable : Callable [[], JsonEncodeable ]
187
192
) -> None :
188
- # FileSystem class has this method. Only check for system instructions as that's the only where the filename is a hash.
189
- # https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.exists
190
- if "_system_instruction" in path and self ._fs . exists (path ): # pyright: ignore[reportUnknownMemberType]
193
+ # Only check for system instruction file existence as that's the only file where the filename is a hash
194
+ # of the content.
195
+ if "_system_instruction" in path and self ._file_exists (path ):
191
196
return
192
197
if self ._format == "json" :
193
198
# output as a single line with the json messages array
0 commit comments