Skip to content

Commit d7a9e30

Browse files
authored
feature: Delayed background rendering (#1)
1 parent 3cbf16b commit d7a9e30

File tree

7 files changed

+120
-73
lines changed

7 files changed

+120
-73
lines changed

pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
130130
if (doc->getPageRotate(pageno) == 90 || doc->getPageRotate(pageno) == 270)
131131
std::swap(page_height, page_width);
132132

133-
string fn = (char*)html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno);
134-
if(param.embed_image)
135-
html_renderer->tmp_files.add(fn);
133+
auto fn = html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno);
136134

137-
surface = cairo_svg_surface_create(fn.c_str(), page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI);
135+
surface = cairo_svg_surface_create((const char *)fn, page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI);
138136
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
139137
cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi);
140138

@@ -174,15 +172,14 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
174172
{
175173
int n = 0;
176174
char c;
177-
ifstream svgfile(fn);
175+
ifstream svgfile((const char *)fn);
178176
//count of '<' in the file should be an approximation of node count.
179177
while(svgfile >> c)
180178
{
181179
if (c == '<')
182180
++n;
183181
if (n > param.svg_node_count_limit)
184182
{
185-
html_renderer->tmp_files.add(fn);
186183
return false;
187184
}
188185
}
@@ -192,6 +189,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
192189
for (auto id : bitmaps_in_current_page)
193190
++bitmaps_ref_count[id];
194191

192+
if(param.embed_image)
193+
html_renderer->tmp_files.add((const char *)fn);
194+
195195
return true;
196196
}
197197

pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc

Lines changed: 42 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -111,66 +111,55 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
111111
(!(param.use_cropbox)),
112112
false, false,
113113
nullptr, nullptr, &annot_cb, &process_annotation);
114+
115+
auto * bitmap = getBitmap();
116+
117+
auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str());
118+
119+
SplashImageFileFormat splashImageFileFormat;
120+
if(format == "png")
121+
splashImageFileFormat = splashFormatPng;
122+
else if(format == "jpg")
123+
splashImageFileFormat = splashFormatJpeg;
124+
else
125+
throw string("Image format not supported: ") + format;
126+
127+
SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi);
128+
if (e != splashOk)
129+
throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e);
130+
131+
if(param.embed_image)
132+
html_renderer->tmp_files.add((const char *)fn);
133+
114134
return true;
115135
}
116136

117137
void SplashBackgroundRenderer::embed_image(int pageno)
118138
{
119-
auto * bitmap = getBitmap();
120-
// dump the background image only when it is not empty
121-
if(bitmap->getWidth() >= 0 && bitmap->getHeight() >= 0)
139+
auto & f_page = *(html_renderer->f_curpage);
140+
141+
f_page << "<img class=\"" << CSS::FULL_BACKGROUND_IMAGE_CN
142+
<< "\" alt=\"\" src=\"";
143+
144+
if(param.embed_image)
145+
{
146+
auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str());
147+
ifstream fin((char*)path, ifstream::binary);
148+
if(!fin)
149+
throw string("Cannot read background image ") + (char*)path;
150+
151+
auto iter = FORMAT_MIME_TYPE_MAP.find(format);
152+
if(iter == FORMAT_MIME_TYPE_MAP.end())
153+
throw string("Image format not supported: ") + format;
154+
155+
string mime_type = iter->second;
156+
f_page << "data:" << mime_type << ";base64," << Base64Stream(fin);
157+
}
158+
else
122159
{
123-
{
124-
auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str());
125-
if(param.embed_image)
126-
html_renderer->tmp_files.add((const char *)fn);
127-
128-
SplashImageFileFormat splashImageFileFormat;
129-
if(format == "png")
130-
splashImageFileFormat = splashFormatPng;
131-
else if(format == "jpg")
132-
splashImageFileFormat = splashFormatJpeg;
133-
else
134-
throw string("Image format not supported: ") + format;
135-
136-
SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi);
137-
if (e != splashOk)
138-
throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e);
139-
}
140-
141-
double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
142-
double v_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
143-
144-
auto & f_page = *(html_renderer->f_curpage);
145-
auto & all_manager = html_renderer->all_manager;
146-
147-
f_page << "<img class=\"" << CSS::BACKGROUND_IMAGE_CN
148-
<< " " << CSS::LEFT_CN << all_manager.left.install(0.0L)
149-
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(0.0L)
150-
<< " " << CSS::WIDTH_CN << all_manager.width.install(h_scale * bitmap->getWidth())
151-
<< " " << CSS::HEIGHT_CN << all_manager.height.install(v_scale * bitmap->getHeight())
152-
<< "\" alt=\"\" src=\"";
153-
154-
if(param.embed_image)
155-
{
156-
auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str());
157-
ifstream fin((char*)path, ifstream::binary);
158-
if(!fin)
159-
throw string("Cannot read background image ") + (char*)path;
160-
161-
auto iter = FORMAT_MIME_TYPE_MAP.find(format);
162-
if(iter == FORMAT_MIME_TYPE_MAP.end())
163-
throw string("Image format not supported: ") + format;
164-
165-
string mime_type = iter->second;
166-
f_page << "data:" << mime_type << ";base64," << Base64Stream(fin);
167-
}
168-
else
169-
{
170-
f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str());
171-
}
172-
f_page << "\"/>";
160+
f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str());
173161
}
162+
f_page << "\"/>";
174163
}
175164

176165
} // namespace pdf2htmlEX

pdf2htmlEX/src/CoveredTextDetector.cc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@
1414

1515
namespace pdf2htmlEX {
1616

17-
CoveredTextDetector::CoveredTextDetector(Param & param): param(param)
17+
CoveredTextDetector::CoveredTextDetector()
18+
{
19+
}
20+
21+
CoveredTextDetector::CoveredTextDetector(Param & param): param(&param)
1822
{
1923
}
2024

@@ -41,10 +45,10 @@ void CoveredTextDetector::add_char_bbox_clipped(cairo_t *cairo, double * bbox, i
4145
char_pts_visible.push_back(pts_visible);
4246

4347
// DCRH: Hide if no points are visible, or if some points are visible and correct_text_visibility == 2
44-
if (pts_visible == 0 || param.correct_text_visibility == 2) {
48+
if (pts_visible == 0 || param->correct_text_visibility == 2) {
4549
chars_covered.push_back(true);
46-
if (pts_visible > 0 && param.correct_text_visibility == 2) {
47-
param.actual_dpi = std::min(param.text_dpi, param.max_dpi); // Char partially covered so increase background resolution
50+
if (pts_visible > 0 && param->correct_text_visibility == 2) {
51+
param->actual_dpi = std::min(param->text_dpi, param->max_dpi); // Char partially covered so increase background resolution
4852
}
4953
} else {
5054
chars_covered.push_back(false);
@@ -98,13 +102,13 @@ printf("pts_visible=%x\n", pts_visible);
98102
printf("pts_visible=%x\n", pts_visible);
99103
#endif
100104
char_pts_visible[i] = pts_visible;
101-
if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param.correct_text_visibility == 2)) {
105+
if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param->correct_text_visibility == 2)) {
102106
#ifdef DEBUG
103107
printf("Char covered\n");
104108
#endif
105109
chars_covered[i] = true;
106-
if (pts_visible > 0 && param.correct_text_visibility == 2) { // Partially visible text => increase rendering DPI
107-
param.actual_dpi = std::min(param.text_dpi, param.max_dpi);
110+
if (pts_visible > 0 && param->correct_text_visibility == 2) { // Partially visible text => increase rendering DPI
111+
param->actual_dpi = std::min(param->text_dpi, param->max_dpi);
108112
}
109113
}
110114
} else {

pdf2htmlEX/src/CoveredTextDetector.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ namespace pdf2htmlEX {
2121
class CoveredTextDetector
2222
{
2323
public:
24+
CoveredTextDetector();
2425

2526
CoveredTextDetector(Param & param);
2627

@@ -60,7 +61,7 @@ class CoveredTextDetector
6061
// x00, y00, x01, y01; x10, y10, x11, y11;...
6162
std::vector<double> char_bboxes;
6263
std::vector<int> char_pts_visible;
63-
Param & param;
64+
Param * param;
6465
};
6566

6667
}

pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ struct HTMLRenderer : OutputDev
8080

8181
void process(PDFDoc * doc);
8282

83+
bool renderPage(PDFDoc * doc, int pageno);
84+
8385
////////////////////////////////////////////////////
8486
// OutputDev interface
8587
////////////////////////////////////////////////////
@@ -379,6 +381,11 @@ struct HTMLRenderer : OutputDev
379381

380382
CoveredTextDetector covered_text_detector;
381383
DrawingTracer tracer;
384+
385+
struct PageCache {
386+
CoveredTextDetector covered_text_detector;
387+
};
388+
std::unordered_map<int, PageCache> page_cache;
382389
};
383390

384391
} //namespace pdf2htmlEX

pdf2htmlEX/src/HTMLRenderer/general.cc

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,44 @@ void HTMLRenderer::process(PDFDoc *doc)
183183

184184
post_process();
185185

186-
bg_renderer = nullptr;
187-
fallback_bg_renderer = nullptr;
186+
if (param.delay_background == 0)
187+
{
188+
bg_renderer = nullptr;
189+
fallback_bg_renderer = nullptr;
190+
}
188191

189192
if(param.quiet == 0)
190193
cerr << endl;
191194
}
192195

196+
bool HTMLRenderer::renderPage(PDFDoc *doc, int pageno)
197+
{
198+
if (param.delay_background == 0)
199+
{
200+
return false;
201+
}
202+
203+
if (page_cache.find(pageno) == page_cache.end())
204+
{
205+
cerr << "Page number " << pageno << " not found in page cache" << endl;
206+
return false;
207+
}
208+
209+
covered_text_detector = page_cache[pageno].covered_text_detector;
210+
211+
if (bg_renderer->render_page(cur_doc, pageno))
212+
{
213+
return true;
214+
}
215+
else if (fallback_bg_renderer)
216+
{
217+
if (fallback_bg_renderer->render_page(cur_doc, pageno))
218+
return true;
219+
}
220+
221+
return false;
222+
}
223+
193224
void HTMLRenderer::setDefaultCTM(const double *ctm)
194225
{
195226
memcpy(default_ctm, ctm, sizeof(default_ctm));
@@ -243,14 +274,21 @@ void HTMLRenderer::endPage() {
243274

244275
if(param.process_nontext)
245276
{
246-
if (bg_renderer->render_page(cur_doc, pageNum))
277+
if (param.delay_background)
247278
{
248279
bg_renderer->embed_image(pageNum);
249280
}
250-
else if (fallback_bg_renderer)
281+
else
251282
{
252-
if (fallback_bg_renderer->render_page(cur_doc, pageNum))
253-
fallback_bg_renderer->embed_image(pageNum);
283+
if (bg_renderer->render_page(cur_doc, pageNum))
284+
{
285+
bg_renderer->embed_image(pageNum);
286+
}
287+
else if (fallback_bg_renderer)
288+
{
289+
if (fallback_bg_renderer->render_page(cur_doc, pageNum))
290+
fallback_bg_renderer->embed_image(pageNum);
291+
}
254292
}
255293
}
256294

@@ -294,6 +332,13 @@ void HTMLRenderer::endPage() {
294332
{
295333
f_pages.fs << "</div>" << endl;
296334
}
335+
336+
if (param.delay_background)
337+
{
338+
page_cache[this->pageNum] = {
339+
.covered_text_detector = covered_text_detector,
340+
};
341+
}
297342
}
298343

299344
void HTMLRenderer::pre_process(PDFDoc * doc)

pdf2htmlEX/src/Param.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct Param
4646
int printing;
4747
int fallback;
4848
int tmp_file_size_limit;
49+
int delay_background;
4950

5051
// fonts
5152
int embed_external_font;

0 commit comments

Comments
 (0)