Skip to content

Commit 3a34c1a

Browse files
committed
frontend/latex/output: extract summary to use hook
1 parent 24a61f2 commit 3a34c1a

File tree

2 files changed

+295
-239
lines changed

2 files changed

+295
-239
lines changed

src/packages/frontend/frame-editors/latex-editor/output.tsx

Lines changed: 22 additions & 239 deletions
Original file line numberDiff line numberDiff line change
@@ -49,136 +49,7 @@ import { use_build_logs } from "./hooks";
4949
import { PDFControls } from "./pdf-controls";
5050
import { PDFJS } from "./pdfjs";
5151
import { BuildLogs } from "./types";
52-
53-
const SUMMARIZE_TEX_FILES = `
54-
import sys
55-
import json
56-
import re
57-
import os
58-
59-
def clean_latex_text(text):
60-
"""Remove LaTeX commands and clean up text for readability"""
61-
# Remove comments
62-
text = re.sub(r'%.*$', '', text, flags=re.MULTILINE)
63-
64-
# Remove common LaTeX commands but preserve content
65-
text = re.sub(r'\\\\(title|author|section|subsection|subsubsection|chapter)\\{([^}]*)\\}', r'**\\2**', text)
66-
text = re.sub(r'\\\\(emph|textit)\\{([^}]*)\\}', r'_\\2_', text)
67-
text = re.sub(r'\\\\(textbf|textsc)\\{([^}]*)\\}', r'**\\2**', text)
68-
69-
# Remove other LaTeX commands
70-
text = re.sub(r'\\\\[a-zA-Z]+\\*?\\{[^}]*\\}', '', text)
71-
text = re.sub(r'\\\\[a-zA-Z]+\\*?', '', text)
72-
73-
# Remove LaTeX environments but keep content
74-
text = re.sub(r'\\\\begin\\{[^}]*\\}', '', text)
75-
text = re.sub(r'\\\\end\\{[^}]*\\}', '', text)
76-
77-
# Remove excessive whitespace
78-
text = re.sub(r'\\n\\s*\\n', '\\n', text)
79-
text = re.sub(r'\\s+', ' ', text).strip()
80-
81-
return text
82-
83-
def extract_summary(filepath, home_dir):
84-
"""Extract a meaningful summary from a LaTeX file"""
85-
if not filepath.endswith(('.tex', '.latex')):
86-
return "Non-LaTeX file"
87-
88-
# Handle different path formats
89-
if filepath.startswith('~/'):
90-
# Path starts with ~/ - replace ~ with home directory
91-
expanded_path = os.path.join(home_dir, filepath[2:])
92-
elif os.path.isabs(filepath):
93-
# Absolute path - use as is
94-
expanded_path = filepath
95-
else:
96-
# Relative path - join with home directory
97-
expanded_path = os.path.join(home_dir, filepath)
98-
99-
if not os.path.exists(expanded_path):
100-
return f"File not found: {expanded_path}"
101-
102-
try:
103-
with open(expanded_path, 'r', encoding='utf-8', errors='ignore') as f:
104-
content = f.read()
105-
except Exception as e:
106-
return f"Error reading file: {str(e)}"
107-
108-
# Extract first meaningful content (skip documentclass, packages, etc.)
109-
lines = content.split('\\n')
110-
useful_lines = []
111-
in_preamble = True
112-
has_document_env = '\\\\begin{document}' in content
113-
114-
for line in lines:
115-
line = line.strip()
116-
if not line or line.startswith('%'):
117-
continue
118-
119-
# Check if we're past the preamble
120-
if '\\\\begin{document}' in line:
121-
in_preamble = False
122-
continue
123-
124-
# For files without \\begin{document}, treat everything as content
125-
if not has_document_env:
126-
in_preamble = False
127-
128-
if in_preamble:
129-
# Extract title, author from preamble
130-
if line.startswith('\\\\title{') or line.startswith('\\\\author{'):
131-
useful_lines.append(line)
132-
else:
133-
# Extract meaningful content
134-
if any(cmd in line for cmd in ['\\\\section', '\\\\subsection', '\\\\chapter', '\\\\subsubsection']):
135-
useful_lines.append(line)
136-
elif line and not line.startswith('\\\\') and len(line) > 3: # Lowered threshold
137-
useful_lines.append(line)
138-
elif line.startswith('\\\\') and len(line) > 10: # Include some LaTeX commands
139-
useful_lines.append(line)
140-
141-
# Limit to first 15 useful lines
142-
if len(useful_lines) >= 15:
143-
break
144-
145-
# If we found some useful content, use it
146-
if useful_lines:
147-
summary_text = '\\n'.join(useful_lines[:8]) # Use more lines
148-
cleaned = clean_latex_text(summary_text)
149-
if cleaned and len(cleaned.strip()) > 0:
150-
# Convert to single line and truncate if too long
151-
cleaned = ' '.join(cleaned.split()) # Remove all newlines and extra spaces
152-
if len(cleaned) > 200:
153-
cleaned = cleaned[:197] + "..."
154-
return cleaned
155-
156-
# Fallback: show raw content (first 200 chars, cleaned)
157-
# Remove comments first
158-
raw_content = re.sub(r'%.*$', '', content, flags=re.MULTILINE)
159-
raw_content = ' '.join(raw_content.split()) # Convert to single line
160-
161-
if len(raw_content) > 200:
162-
raw_content = raw_content[:197] + "..."
163-
164-
return raw_content if raw_content else "LaTeX document"
165-
166-
def main():
167-
if len(sys.argv) < 3:
168-
print(json.dumps({"error": "Usage: script.py <home_dir> <file1> <file2> ..."}))
169-
return
170-
171-
home_dir = sys.argv[1]
172-
results = {}
173-
174-
for filepath in sys.argv[2:]:
175-
results[filepath] = extract_summary(filepath, home_dir)
176-
177-
print(json.dumps(results, ensure_ascii=False))
178-
179-
if __name__ == "__main__":
180-
main()
181-
`;
52+
import { useFileSummaries } from "./summarize-tex";
18253

18354
interface OutputProps {
18455
id: string;
@@ -199,6 +70,7 @@ type TabType = "pdf" | "contents" | "files" | "build" | "errors";
19970

20071
interface FileListItem {
20172
path: string;
73+
displayPath: string;
20274
isMain: boolean;
20375
summary: string;
20476
}
@@ -265,16 +137,13 @@ export function Output(props: OutputProps) {
265137
// List of LaTeX files in the project
266138
const switch_to_files: List<string> = useRedux([name, "switch_to_files"]);
267139

268-
// File summaries state with caching (1 minute max)
269-
const [fileSummaries, setFileSummaries] = useState<Record<string, string>>(
270-
{},
271-
);
272-
const [lastSummariesFetch, setLastSummariesFetch] = useState<number>(0);
273-
const [summariesLoading, setSummariesLoading] = useState<boolean>(false);
274-
275140
// Home directory - computed once since it never changes
276141
const [homeDir, setHomeDir] = useState<string | null>(null);
277142

143+
// File summaries using the custom hook
144+
const { fileSummaries, summariesLoading, refreshSummaries } =
145+
useFileSummaries(switch_to_files, project_id, path, homeDir, reload);
146+
278147
// Fetch home directory once when component mounts or project_id changes
279148
React.useEffect(() => {
280149
const fetchHomeDir = async () => {
@@ -403,102 +272,6 @@ export function Output(props: OutputProps) {
403272
return { errors, warnings, typesetting };
404273
}, [build_logs, knitr]);
405274

406-
// Function to generate file summaries using Python script
407-
const generateFileSummaries = useCallback(
408-
async (forceRefresh: boolean = false) => {
409-
if (!switch_to_files || switch_to_files.size === 0) return;
410-
411-
const now = Date.now();
412-
const oneMinute = 60 * 1000;
413-
414-
// Only update if it's been more than 1 minute since last fetch (unless forced)
415-
if (!forceRefresh && now - lastSummariesFetch < oneMinute) return;
416-
417-
setSummariesLoading(true);
418-
419-
try {
420-
// Execute Python script with file list as arguments
421-
const fileList = switch_to_files.toJS();
422-
423-
// Write Python script to temporary file to avoid command line escaping issues
424-
const scriptPath = "/tmp/tex_summarizer.py";
425-
await exec({
426-
command: `cat > "${scriptPath}" << 'EOFPYTHON'\n${SUMMARIZE_TEX_FILES}\nEOFPYTHON`,
427-
project_id,
428-
path: path_split(path).head,
429-
timeout: 5,
430-
});
431-
432-
// Use the pre-fetched home directory
433-
if (!homeDir) {
434-
console.warn("Home directory not available yet");
435-
return;
436-
}
437-
438-
// The switch_to_files contains canonical paths relative to the project root
439-
// Pass the actual home directory to the Python script
440-
const result = await exec({
441-
command: "python3",
442-
args: [scriptPath, homeDir, ...fileList],
443-
project_id,
444-
path: path_split(path).head, // Run from current file's directory
445-
timeout: 30, // 30 second timeout
446-
});
447-
448-
if (result.exit_code === 0 && result.stdout) {
449-
try {
450-
const summaries = JSON.parse(result.stdout);
451-
setFileSummaries(summaries);
452-
} catch (parseError) {
453-
console.warn("Failed to parse summary results:", parseError);
454-
// Fallback to basic summaries
455-
const fallbackSummaries: Record<string, string> = {};
456-
switch_to_files.forEach((filePath) => {
457-
fallbackSummaries[filePath] = "LaTeX document";
458-
});
459-
setFileSummaries(fallbackSummaries);
460-
}
461-
} else {
462-
console.warn(
463-
"Summary generation failed:",
464-
result.stderr ?? "Unknown error",
465-
);
466-
// Fallback to basic summaries
467-
const fallbackSummaries: Record<string, string> = {};
468-
switch_to_files.forEach((filePath) => {
469-
fallbackSummaries[filePath] = "LaTeX document";
470-
});
471-
setFileSummaries(fallbackSummaries);
472-
}
473-
} catch (error) {
474-
console.warn("Error generating summaries:", error);
475-
// Fallback to basic summaries
476-
const fallbackSummaries: Record<string, string> = {};
477-
switch_to_files.forEach((filePath) => {
478-
fallbackSummaries[filePath] = "LaTeX document";
479-
});
480-
setFileSummaries(fallbackSummaries);
481-
} finally {
482-
setLastSummariesFetch(now);
483-
setSummariesLoading(false);
484-
}
485-
},
486-
[switch_to_files, lastSummariesFetch, reload],
487-
);
488-
489-
// Manual refresh function that bypasses the rate limiting
490-
const refreshSummaries = useCallback(
491-
() => generateFileSummaries(true),
492-
[generateFileSummaries],
493-
);
494-
495-
// Generate file summaries when files change
496-
React.useEffect(() => {
497-
if (switch_to_files && switch_to_files.size > 1) {
498-
generateFileSummaries();
499-
}
500-
}, [switch_to_files, generateFileSummaries]);
501-
502275
// No automatic tab switching - let user control tabs manually
503276
// Errors are indicated with red exclamation icon only
504277

@@ -590,11 +363,21 @@ export function Output(props: OutputProps) {
590363
.sort();
591364
const subFileCount = subFiles.size;
592365

593-
const listData = subFiles.toJS().map((filePath: string) => ({
594-
path: filePath,
595-
isMain: false,
596-
summary: fileSummaries[filePath] ?? "Loading...",
597-
}));
366+
// Compute the common prefix to strip (directory of main file)
367+
const prefix = path_split(path).head;
368+
const prefixWithSlash = prefix ? prefix + "/" : "";
369+
370+
const listData = subFiles.toJS().map((filePath: string) => {
371+
const displayPath = filePath.startsWith(prefixWithSlash)
372+
? filePath.slice(prefixWithSlash.length)
373+
: filePath;
374+
return {
375+
path: filePath,
376+
displayPath,
377+
isMain: false,
378+
summary: fileSummaries[filePath] ?? "Loading...",
379+
};
380+
});
598381

599382
return {
600383
key: "files",
@@ -686,7 +469,7 @@ export function Output(props: OutputProps) {
686469
fontSize: `${uiFontSize}px`,
687470
}}
688471
>
689-
{item.path}
472+
{item.displayPath}
690473
</span>
691474
}
692475
description={

0 commit comments

Comments
 (0)