Skip to content

Commit 3403d5b

Browse files
committed
Proper LaTeX path + extension handling
1 parent af2c769 commit 3403d5b

1 file changed

Lines changed: 14 additions & 5 deletions

File tree

latex_clean_fig/clean.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,23 @@
66

77
# Function to extract included images from LaTeX file
88
def extract_included_images(tex_file: str):
9-
with open(tex_file, 'r') as file:
9+
with open(tex_file, 'r', encoding="utf-8", errors="ignore") as file:
1010
content = file.read()
1111

12-
# Regex to match \includegraphics{...} or \includegraphics[...]{...} # TODO: check the other libraries
13-
image_pattern = re.compile(r"\\includegraphics(?:\[.*?\])?\{(.+?)\}")
12+
# Regex to match \includegraphics{...} or \includegraphics[...]{...}
13+
image_pattern = re.compile(
14+
r"\\includegraphics(?:\s*\[.*?\])?\s*\{\s*([^}]+?)\s*\}"
15+
)
16+
17+
images = set()
18+
for match in image_pattern.findall(content):
19+
# Normalize: lowercase + strip directories
20+
basename = os.path.basename(match.strip()).lower()
21+
stem, _ = os.path.splitext(basename)
22+
23+
images.add(basename) # e.g. myplot.pdf
24+
images.add(stem) # e.g. myplot
1425

15-
# Extract all image paths (normalize names to lowercase)
16-
images = set(match.lower() for match in image_pattern.findall(content))
1726
return images
1827

1928
# Function to find and remove unused images

0 commit comments

Comments
 (0)