Skip to content

Commit b41a858

Browse files
committed
Add table extraction to Makefile
1 parent badc589 commit b41a858

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,25 @@ ANNOTATIONS_DIR = $(DATA_DIR)/annotations
33
ARCHIVES_DIR = $(DATA_DIR)/arxiv/sources
44
UNPACKED_DIR = $(DATA_DIR)/arxiv/unpacked_sources
55
HTMLS_DIR = $(DATA_DIR)/arxiv/htmls
6+
TABLES_DIR = $(DATA_DIR)/arxiv/tables
67

78
ARCHIVES = $(wildcard $(ARCHIVES_DIR)/*)
89
UNPACKS = $(patsubst $(ARCHIVES_DIR)/%,$(UNPACKED_DIR)/%,$(ARCHIVES))
910
HTMLS = $(patsubst $(ARCHIVES_DIR)/%,$(HTMLS_DIR)/%.html,$(ARCHIVES))
11+
TABLES = $(patsubst $(ARCHIVES_DIR)/%,$(TABLES_DIR)/%,$(ARCHIVES))
1012

11-
$(shell mkdir -p "$(DATA_DIR)")
12-
$(shell mkdir -p "$(HTMLS_DIR)")
13+
$(shell mkdir -p "$(DATA_DIR)" "$(HTMLS_DIR)" "$(TABLES_DIR)")
1314

1415
.PHONY: all
15-
all: $(ANNOTATIONS_DIR)/pdfs-urls.csv $(ANNOTATIONS_DIR)/sources-urls.csv convert_all
16+
all: $(ANNOTATIONS_DIR)/pdfs-urls.csv $(ANNOTATIONS_DIR)/sources-urls.csv extract_all
17+
18+
extract_all: $(TABLES)
1619

1720
convert_all: $(HTMLS)
1821

22+
$(TABLES): $(TABLES_DIR)/%: $(HTMLS_DIR)/%.html
23+
python ./extract_tables.py $^ --outdir $(TABLES_DIR)
24+
1925
$(HTMLS): $(HTMLS_DIR)/%.html: $(UNPACKED_DIR)/%
2026
./docker-latex2html.sh $(HTMLS_DIR) $^
2127

0 commit comments

Comments
 (0)