|
2 | 2 | import hashlib
|
3 | 3 | import json
|
4 | 4 | import re
|
| 5 | +from datetime import timedelta |
5 | 6 | from pathlib import Path
|
6 | 7 | from typing import List, Optional
|
7 | 8 |
|
@@ -200,20 +201,23 @@ def _extract_request_message(self, request: str) -> Optional[dict]:
|
200 | 201 |
|
201 | 202 | def _create_hash_key(self, message: str, provider: str) -> str:
|
202 | 203 | """Creates a hash key from the message and includes the provider"""
|
203 |
| - # Try to extract the path from the message. Most of the times is at the top of the message. |
204 |
| - # The pattern was generated using ChatGPT. Should match common occurrences like: |
| 204 | + # Try to extract the path from the FIM message. The path is in FIM request in these formats: |
205 | 205 | # folder/testing_file.py
|
206 | 206 | # Path: file3.py
|
207 |
| - pattern = r"(?:[a-zA-Z]:\\|\/)?(?:[^\s\/]+\/)*[^\s\/]+\.[^\s\/]+" |
208 |
| - match = re.search(pattern, message) |
209 |
| - # Copilot it's the only provider that has an easy path to extract. |
210 |
| - # Other providers are harder to extact. This part needs to be revisited for the moment |
211 |
| - # hasing the entire request message. |
212 |
| - if match is None or provider != "copilot": |
213 |
| - logger.warning("No path found in message or not copilot. Creating hash from message.") |
| 207 | + pattern = r"^#.*?\b([a-zA-Z0-9_\-\/]+\.\w+)\b" |
| 208 | + matches = re.findall(pattern, message, re.MULTILINE) |
| 209 | + # If no path is found, hash the entire prompt message. |
| 210 | + if not matches: |
| 211 | + logger.warning("No path found in messages. Creating hash cache from message.") |
214 | 212 | message_to_hash = f"{message}-{provider}"
|
215 | 213 | else:
|
216 |
| - message_to_hash = f"{match.group(0)}-{provider}" |
| 214 | + # Copilot puts the path at the top of the file. Continue providers contain |
| 215 | + # several paths, the one in which the fim is triggered is the last one. |
| 216 | + if provider == "copilot": |
| 217 | + filepath = matches[0] |
| 218 | + else: |
| 219 | + filepath = matches[-1] |
| 220 | + message_to_hash = f"{filepath}-{provider}" |
217 | 221 |
|
218 | 222 | logger.debug(f"Message to hash: {message_to_hash}")
|
219 | 223 | hashed_content = hashlib.sha256(message_to_hash.encode("utf-8")).hexdigest()
|
@@ -247,7 +251,10 @@ def _should_record_context(self, context: Optional[PipelineContext]) -> bool:
|
247 | 251 |
|
248 | 252 | elapsed_seconds = (context.input_request.timestamp - old_timestamp).total_seconds()
|
249 | 253 | if elapsed_seconds < Config.get_config().max_fim_hash_lifetime:
|
250 |
| - logger.info(f"Skipping context recording. Elapsed time: {elapsed_seconds} seconds.") |
| 254 | + logger.info( |
| 255 | + f"Skipping DB context recording. " |
| 256 | + f"Elapsed time since last FIM cache: {timedelta(seconds=elapsed_seconds)}." |
| 257 | + ) |
251 | 258 | return False
|
252 | 259 |
|
253 | 260 | async def record_context(self, context: Optional[PipelineContext]) -> None:
|
|
0 commit comments