Skip to content

Commit aba7a47

Browse files
chenrui7lyingbug
authored andcommitted
fix(parser): resolve chunk index mismatch in logs
1 parent 6a3c29d commit aba7a47

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

docreader/parser/base_parser.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,7 @@ def extract_images_from_chunk(self, chunk: Chunk) -> List[Dict[str, str]]:
683683
Returns:
684684
List of image information
685685
"""
686-
logger.info(f"Extracting image information from Chunk #{chunk.seq}")
686+
logger.info(f"Extracting image information from Chunk #{chunk.seq + 1}")
687687
text = chunk.content
688688

689689
# Regex to extract image information from text,
@@ -692,7 +692,7 @@ def extract_images_from_chunk(self, chunk: Chunk) -> List[Dict[str, str]]:
692692

693693
# Extract image information
694694
img_matches = list(re.finditer(img_pattern, text))
695-
logger.info(f"Chunk #{chunk.seq} found {len(img_matches)} images")
695+
logger.info(f"Chunk #{chunk.seq + 1} found {len(img_matches)} images")
696696

697697
images_info = []
698698
for match_idx, match in enumerate(img_matches):
@@ -711,9 +711,9 @@ def extract_images_from_chunk(self, chunk: Chunk) -> List[Dict[str, str]]:
711711
images_info.append(image_info)
712712

713713
logger.info(
714-
f"Image in Chunk #{chunk.seq} {match_idx + 1}: URL={img_url[:50]}..."
714+
f"Image in Chunk #{chunk.seq + 1} {match_idx + 1}: URL={img_url[:50]}..."
715715
if len(img_url) > 50
716-
else f"Image in Chunk #{chunk.seq} {match_idx + 1}: URL={img_url}"
716+
else f"Image in Chunk #{chunk.seq + 1} {match_idx + 1}: URL={img_url}"
717717
)
718718

719719
return images_info

0 commit comments

Comments
 (0)