Skip to content

Commit f2d1ce1

Browse files
authored
Adjustment when calculating hash | Adjustment of the hash calculation… (#1837)
… method When trying to load the saved models after adaptation, alerts like these were always triggered: Loaded prompt hash does not match the saved hash. Loaded prompt hash does not match the saved hash. Furthermore, in Python, the default hash() function may yield different results for the same string across different sessions. To achieve consistent hash values, for tha i using the hashlib module to calculate de hash of prompt, which provides stable hashing algorithms.
1 parent 433d84f commit f2d1ce1

File tree

1 file changed

+23
-20
lines changed

1 file changed

+23
-20
lines changed

src/ragas/prompt/pydantic_prompt.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import json
55
import logging
66
import os
7+
import hashlib
8+
79
import typing as t
810

911
from langchain_core.exceptions import OutputParserException
@@ -226,12 +228,7 @@ async def adapt(
226228
"""
227229
Adapt the prompt to a new language.
228230
"""
229-
230-
# set the original hash, this is used to
231-
# identify the original prompt object when loading from file
232-
if self.original_hash is None:
233-
self.original_hash = hash(self)
234-
231+
235232
strings = get_all_strings(self.examples)
236233
translated_strings = await translate_statements_prompt.generate(
237234
llm=llm,
@@ -257,6 +254,8 @@ async def adapt(
257254
)
258255
new_prompt.instruction = translated_instruction.statements[0]
259256

257+
new_prompt.original_hash = hash(new_prompt)
258+
260259
return new_prompt
261260

262261
def __repr__(self):
@@ -276,7 +275,7 @@ def __str__(self):
276275
ensure_ascii=False,
277276
)[1:-1]
278277
return f"{self.__class__.__name__}({json_str})"
279-
278+
280279
def __hash__(self):
281280
# convert examples to json string for hashing
282281
examples = []
@@ -285,19 +284,23 @@ def __hash__(self):
285284
examples.append(
286285
(input_model.model_dump_json(), output_model.model_dump_json())
287286
)
288-
289-
# not sure if input_model and output_model should be included
290-
return hash(
291-
(
292-
self.name,
293-
self.input_model,
294-
self.output_model,
295-
self.instruction,
296-
*examples,
297-
self.language,
298-
)
299-
)
300-
287+
288+
# create a SHA-256 hash object
289+
hasher = hashlib.sha256()
290+
291+
# update the hash object with the bytes of each attribute
292+
hasher.update(self.name.encode('utf-8'))
293+
hasher.update(self.input_model.__name__.encode('utf-8'))
294+
hasher.update(self.output_model.__name__.encode('utf-8'))
295+
hasher.update(self.instruction.encode('utf-8'))
296+
for example in examples:
297+
hasher.update(example[0].encode('utf-8'))
298+
hasher.update(example[1].encode('utf-8'))
299+
hasher.update(self.language.encode('utf-8'))
300+
301+
# return the integer value of the hash
302+
return int(hasher.hexdigest(), 16)
303+
301304
def __eq__(self, other):
302305
if not isinstance(other, PydanticPrompt):
303306
return False

0 commit comments

Comments
 (0)