Skip to content

Commit 8385aaf

Browse files
authored
Merge pull request #1 from paperswithcode/latexml-migration
Latexml migration
2 parents 1db8c1f + ff48090 commit 8385aaf

27 files changed

+1272
-186
lines changed

Makefile

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,11 @@ all: $(ANNOTATIONS_DIR)/pdfs-urls.csv $(ANNOTATIONS_DIR)/sources-urls.csv extrac
2020

2121
.PHONY: test
2222
test: DATA_DIR = test/data
23-
test: TABLE_FILE = $(TABLES_DIR)/paper/table_01.csv
2423
test:
2524
mkdir -p $(ARCHIVES_DIR)
2625
tar czf $(ARCHIVES_DIR)/paper.gz -C test/src .
2726
$(MAKE) DATA_DIR=$(DATA_DIR) --always-make extract_all
28-
cat $(TABLE_FILE)
29-
diff $(TABLE_FILE) test/src/table_01.csv
27+
diff -r $(TABLES_DIR) test/expected
3028

3129
.PHONY: extract_all extract_texts extract_tables fix_htmls_all convert_all unpack_all
3230

clean_html.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ SOURCE=$(realpath "$1")
44
mkdir -p $(dirname "$2")
55
OUTPUT=$(realpath "$2")
66

7-
docker run --rm -v "$SOURCE":/files/index.html:ro --entrypoint '' zenika/alpine-chrome:73 chromium-browser --headless --disable-gpu --disable-software-rasterizer --no-sandbox --timeout=30000 --dump-dom /files/index.html > "$OUTPUT"
7+
docker run --rm -v "$SOURCE":/files/index.html:ro --entrypoint '' zenika/alpine-chrome:73 timeout -t 20 -s KILL chromium-browser --headless --disable-gpu --disable-software-rasterizer --no-sandbox --timeout=30000 --dump-dom /files/index.html > "$OUTPUT"

docker-latex2html.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ OUTPUT=$(realpath "$2") #~/arxiv/htmls/1701/1701.xyz.html
66
OUTPUT_DIR=$(dirname "$OUTPUT") #~/arxiv/htmls/1701
77
FILENAME=$(basename "$OUTPUT") #1701.xyz.html
88

9-
docker run --rm -v $PWD/latex2html.sh:/files/latex2html.sh:ro -v "$SOURCE_DIR":/files/ro-source:ro -v "$OUTPUT_DIR":/files/htmls niccokunzmann/ci-latex /files/latex2html.sh "$FILENAME"
9+
docker run --rm -v $PWD/latex2html.sh:/files/latex2html.sh:ro -v $PWD/guess_main.py:/files/guess_main.py:ro -v $PWD/patches:/files/patches:ro -v "$SOURCE_DIR":/files/ro-source:ro -v "$OUTPUT_DIR":/files/htmls arxivvanity/engrafo /files/latex2html.sh "$FILENAME"

0 commit comments

Comments
 (0)