Skip to content

Commit 8ab088d

Browse files
fix: resolve segfaults (#8)
* first working version to parse page-by-page Signed-off-by: Peter Staar <[email protected]> * added the read page-by-page using bytesio Signed-off-by: Peter Staar <[email protected]> * fixed the segfault (caught weird parameter representation of form d.dd-dddd with d in [0,9]) Signed-off-by: Peter Staar <[email protected]> --------- Signed-off-by: Peter Staar <[email protected]>
1 parent 40168ae commit 8ab088d

File tree

1 file changed

+41
-6
lines changed
  • src/proj_folders/pdf_library/qpdf/parser

1 file changed

+41
-6
lines changed

src/proj_folders/pdf_library/qpdf/parser/stream.h

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -417,21 +417,40 @@ namespace pdf_lib
417417
{
418418
std::string op = object.getOperatorValue();
419419

420-
std::regex expr("[0-9]+\\.?[0-9]*?");
421-
if(std::regex_match(op, expr))
420+
std::regex expr_01("[0-9]+\\.?[0-9]*?");
421+
if(std::regex_match(op, expr_01))
422422
{
423423
std::string::size_type sz;
424424
float value = std::stof(op, &sz);
425425

426426
return value;
427427
}
428+
429+
std::regex expr_02("([0-9]+\\.[0-9]+)\\-([0-9]+)");
430+
std::smatch match;
431+
if(std::regex_match(op, match, expr_02))
432+
{
433+
logging_lib::warn("pdf-parser") << __FILE__ << ":" << __LINE__
434+
<< "\t--> re-identified " << op << " as parameter with value: "
435+
<< match[1].str();
436+
437+
std::string tmp = match[1].str();
438+
439+
std::string::size_type sz;
440+
float value = std::stof(tmp, &sz);
441+
442+
return value;
443+
}
428444
}
429445
else
430446
{
431447
float value = object.getNumericValue();
432448
return value;
433449
}
434450

451+
logging_lib::error("pdf-parser") << __FILE__ << ":" << __LINE__ << "\t" << __FUNCTION__
452+
<< "could not interprete a parameter correctly!";
453+
435454
return 0;
436455
}
437456

@@ -557,8 +576,9 @@ namespace pdf_lib
557576
// FIXME: QPDF sees numbers sometimes as operators. This is clearly wrong.
558577
// With this trick, we try to circumvent the problem. The clean fix however
559578
// is to fix the QPDF-library
560-
std::regex expr("\\-?[0-9]+\\.?[0-9]*?");
561-
if(std::regex_match(op, expr))
579+
std::regex expr_01("\\-?[0-9]+\\.?[0-9]*?");
580+
581+
if(std::regex_match(op, expr_01))
562582
{
563583
logging_lib::warn("pdf-parser") << __FILE__ << ":" << __LINE__
564584
<< "\t--> re-identified as parameter!";
@@ -567,6 +587,19 @@ namespace pdf_lib
567587
}
568588
}
569589

590+
// weird parameter of shape: \d\.\d+\-\d+ (eg 0.00-80)
591+
{
592+
std::regex expr_01("([0-9]+\\.[0-9]+)\\-([0-9]+)");
593+
std::smatch match;
594+
if(std::regex_match(op, match, expr_01))
595+
{
596+
logging_lib::warn("pdf-parser") << __FILE__ << ":" << __LINE__
597+
<< "\t--> re-identified " << op << "as parameter";
598+
_parameters.push_back(object);
599+
return;
600+
}
601+
}
602+
570603
// Deal with cases such as `Do1`, where an operator and parameter are "glued" together ...
571604

572605
std::string val="null";
@@ -575,10 +608,12 @@ namespace pdf_lib
575608
logging_lib::warn("pdf-parser") << __FILE__ << ":" << __LINE__ << "\t"
576609
<< "unknown operator: " << op;
577610

578-
std::regex expr("([A-Za-z]+)(\\-?[0-9]+\\.?[0-9]*?)");
611+
std::regex expr_01("([A-Za-z]+)(\\-?[0-9]+\\.?[0-9]*?)"); // concatenated operator-parameter
612+
//std::regex expr_02("(\d+\.\d+)\-(\d+)");
613+
579614
std::smatch match;
580615

581-
if(std::regex_match(op, match, expr))
616+
if(std::regex_match(op, match, expr_01))
582617
{
583618
std::string op_ = match[1].str();
584619
std::string val_ = match[2].str();

0 commit comments

Comments
 (0)