@@ -27,14 +27,17 @@ namespace pdflib
2727
2828 bool is_adjacent_to (pdf_resource<PAGE_CELL>& other, double delta);
2929
30+ bool has_same_reading_orientation (pdf_resource<PAGE_CELL>& other);
31+
3032 bool merge_with (pdf_resource<PAGE_CELL>& other, double delta);
3133
3234 public:
3335
3436 static std::vector<std::string> header;
3537
3638 bool active;
37-
39+ bool left_to_right;
40+
3841 double x0;
3942 double y0;
4043 double x1;
@@ -81,7 +84,8 @@ namespace pdflib
8184 };
8285
8386 pdf_resource<PAGE_CELL>::pdf_resource():
84- active (true )
87+ active (true ),
88+ left_to_right(true )
8589 {}
8690
8791 pdf_resource<PAGE_CELL>::~pdf_resource ()
@@ -128,7 +132,8 @@ namespace pdflib
128132 // "block-count",
129133 // "instr-count",
130134
131- " widget"
135+ " widget" ,
136+ " left_to_right"
132137 };
133138
134139 void pdf_resource<PAGE_CELL>::rotate(int angle, std::pair<double , double > delta)
@@ -180,7 +185,8 @@ namespace pdflib
180185 cell.push_back (font_key); // 17
181186 cell.push_back (font_name); // 18
182187
183- cell.push_back (widget); // 19
188+ cell.push_back (widget); // 19
189+ cell.push_back (left_to_right); // 20
184190 }
185191 assert (cell.size ()==header.size ());
186192
@@ -219,6 +225,7 @@ namespace pdflib
219225 font_name = data.at (18 ).get <std::string>();
220226
221227 widget = data.at (19 ).get <bool >();
228+ left_to_right = data.at (20 ).get <bool >();
222229
223230 return true ;
224231 }
@@ -255,26 +262,51 @@ namespace pdflib
255262
256263 bool pdf_resource<PAGE_CELL>::is_adjacent_to(pdf_resource<PAGE_CELL>& other, double eps)
257264 {
258- // if(eps<0.0)
259- // {
260- // eps = average_char_width()/2.0;
261- // }
262-
263265 double d0 = std::sqrt ((r_x1-other.r_x0 )*(r_x1-other.r_x0 ) + (r_y1-other.r_y0 )*(r_y1-other.r_y0 ));
264266 double d1 = std::sqrt ((r_x2-other.r_x3 )*(r_x2-other.r_x3 ) + (r_y2-other.r_y3 )*(r_y2-other.r_y3 ));
265267
266268 return ((d0<eps) and (d1<eps));
267269 }
268270
271+ bool pdf_resource<PAGE_CELL>::has_same_reading_orientation(pdf_resource<PAGE_CELL>& other)
272+ {
273+ // it might need is_punctuation function instead of just the space
274+ bool is_punc = utils::string::is_punctuation_or_space (text);
275+ bool other_is_punc = utils::string::is_punctuation_or_space (other.text );
276+
277+ // return ((left_to_right==other.left_to_right) or (text==" " or other.text==" "));
278+ return ((left_to_right==other.left_to_right ) or (is_punc or other_is_punc));
279+ }
280+
269281 bool pdf_resource<PAGE_CELL>::merge_with(pdf_resource<PAGE_CELL>& other, double delta)
270282 {
283+ if (not has_same_reading_orientation (other))
284+ {
285+ LOG_S (ERROR) << " inconsistent merging of cells!" ;
286+ }
287+
271288 double d0 = std::sqrt ((r_x1-other.r_x0 )*(r_x1-other.r_x0 ) + (r_y1-other.r_y0 )*(r_y1-other.r_y0 ));
272289
273- if (delta<d0)
290+ if ((not left_to_right) or (not other.left_to_right ))
291+ {
292+ if (delta<d0)
293+ {
294+ text = " " + text;
295+ }
296+ text = other.text + text;
297+
298+ left_to_right = false ;
299+ }
300+ else
274301 {
275- text += " " ;
276- }
277- text += other.text ;
302+ if (delta<d0)
303+ {
304+ text += " " ;
305+ }
306+ text += other.text ;
307+
308+ left_to_right = true ;
309+ }
278310
279311 r_x1 = other.r_x1 ;
280312 r_y1 = other.r_y1 ;
0 commit comments