@@ -312,52 +312,68 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
312
312
for i , new_line_poly in enumerate (new_line_polygons ):
313
313
for j , line_poly in enumerate (line_polygons ):
314
314
# too strict: .contains
315
- if line_poly .intersects (new_line_poly ):
316
- inter = make_intersection (line_poly .context , new_line_poly )
317
- if not inter :
318
- continue
319
- new_line_mask = (new_line_labels == i + 1 ) & parent_bin
320
- line_mask = line_labels [j ] & parent_bin
321
- inter_mask = new_line_mask & line_mask
322
- if (not np .count_nonzero (inter_mask ) or
323
- not np .count_nonzero (new_line_mask ) or
324
- not np .count_nonzero (line_mask )):
325
- continue
326
- intersections [(i , j )] = inter
327
- fits_bg [i , j ] = inter .area / new_line_poly .area
328
- covers_bg [i , j ] = inter .area / line_poly .context .area
329
- fits_fg [i , j ] = np .count_nonzero (inter_mask ) / np .count_nonzero (new_line_mask )
330
- covers_fg [i , j ] = np .count_nonzero (inter_mask ) / np .count_nonzero (line_mask )
331
- # LOG.debug("new %d old %d (%s): %.1f%% / %.1f%% bg, %.1f%% / %.1f%% fg",
332
- # i, j, lines[j].id,
333
- # fits_bg[i,j]*100, covers_bg[i,j]*100,
334
- # fits_fg[i,j]*100, covers_fg[i,j]*100)
335
- # assign new lines to existing lines, if possible
336
- assignments = np .ones (len (new_line_polygons ), int ) * - 1
337
- for i , new_line_poly in enumerate (new_line_polygons ):
338
- if not fits_bg [i ].any ():
339
- LOG .debug ("new line %d fits no existing line's background" , i )
340
- continue
341
- if not fits_fg [i ].any ():
342
- LOG .debug ("new line %d fits no existing line's foreground" , i )
343
- continue
344
- fits = (fits_bg [i ] > 0.6 ) & (fits_fg [i ] > 0.9 )
345
- if not fits .any ():
346
- j = np .argmax (fits_bg [i ] * fits_fg [i ])
347
- LOG .debug ("best fit '%s' for new line %d fits only %.1f%% bg / %.1f%% fg" ,
348
- lines [j ].id , i , fits_bg [i ,j ] * 100 , fits_fg [i ,j ] * 100 )
315
+ if not line_poly .intersects (new_line_poly ):
316
+ continue
317
+ inter = make_intersection (line_poly .context , new_line_poly )
318
+ if not inter :
319
+ continue
320
+ new_line_mask = (new_line_labels == i + 1 ) & parent_bin
321
+ line_mask = line_labels [j ] & parent_bin
322
+ inter_mask = new_line_mask & line_mask
323
+ if (not np .count_nonzero (inter_mask ) or
324
+ not np .count_nonzero (new_line_mask ) or
325
+ not np .count_nonzero (line_mask )):
326
+ continue
327
+ intersections [(i , j )] = inter
328
+ fits_bg [i , j ] = inter .area / new_line_poly .area
329
+ covers_bg [i , j ] = inter .area / line_poly .context .area
330
+ fits_fg [i , j ] = np .count_nonzero (inter_mask ) / np .count_nonzero (new_line_mask )
331
+ covers_fg [i , j ] = np .count_nonzero (inter_mask ) / np .count_nonzero (line_mask )
332
+ # LOG.debug("new %d old %d (%s): %.1f%% / %.1f%% bg, %.1f%% / %.1f%% fg",
333
+ # i, j, lines[j].id,
334
+ # fits_bg[i,j]*100, covers_bg[i,j]*100,
335
+ # fits_fg[i,j]*100, covers_fg[i,j]*100)
336
+ # assign existing lines to new lines (1:n), if possible
337
+ # start from best matches (forced alignment)
338
+ dim1 = len (new_line_polygons )
339
+ dim2 = len (line_polygons )
340
+ idx1 = np .arange (dim1 )
341
+ idx2 = np .arange (dim2 )
342
+ keep1 = np .ones (dim1 , bool )
343
+ keep2 = np .ones (dim2 , bool )
344
+ assignments = - 1 * np .ones (dim1 , int )
345
+ for _ in range (dim1 ):
346
+ fit_bg_view = fits_bg [np .ix_ (keep1 , keep2 )]
347
+ if not fit_bg_view .size :
348
+ break
349
+ cov_bg_view = covers_bg [np .ix_ (keep1 , keep2 )]
350
+ fit_fg_view = fits_fg [np .ix_ (keep1 , keep2 )]
351
+ cov_fg_view = covers_fg [np .ix_ (keep1 , keep2 )]
352
+ priority = cov_fg_view * cov_bg_view
353
+ ind1 , ind2 = np .unravel_index (np .argmax (priority , axis = None ), priority .shape )
354
+ fit_fg = fit_fg_view [ind1 , ind2 ]
355
+ fit_bg = fit_bg_view [ind1 , ind2 ]
356
+ cov_fg = cov_fg_view [ind1 , ind2 ]
357
+ cov_bg = cov_bg_view [ind1 , ind2 ]
358
+ # return to full view and assign next
359
+ ind1 = idx1 [keep1 ][ind1 ]
360
+ ind2 = idx2 [keep2 ][ind2 ]
361
+ #new_poly = new_line_polygons[ind1]
362
+ #poly = line_polygons[ind2]
363
+ # assignment must be new
364
+ assert assignments [ind1 ] < 0
365
+ assert keep1 [ind1 ]
366
+ assert keep2 [ind2 ]
367
+ # minimum threshold
368
+ if not (fit_bg > 0.6 and fit_fg > 0.7 ):
369
+ # skip next time
370
+ # LOG.debug("match for %s too large: %d%%fg / %d%%bg", lines[ind2].id, fit_fg*100, fit_bg*100)
371
+ covers_bg [ind1 , ind2 ] = 0
372
+ covers_fg [ind1 , ind2 ] = 0
349
373
continue
350
- covers = covers_bg [i ] * covers_fg [i ] * fits
351
- j = np .argmax (covers )
352
- line = lines [j ]
353
- inter_polygon = intersections [(i ,j )]
354
- new_line_polygon = new_line_polygons [i ]
355
- new_center = inter_polygon .centroid
356
- center = new_line_polygon .centroid
357
- # FIXME: apply reasonable threshold for centroid distance
358
- LOG .debug ("new line for '%s' has centroid distance %.2f" ,
359
- line .id , center .distance (new_center ))
360
- assignments [i ] = j
374
+ assignments [ind1 ] = ind2
375
+ keep1 [ind1 ] = False
376
+ #keep2[ind2] = False
361
377
# validate assignments retain enough area and do not loose unassigned matches
362
378
line_polygons = [poly .context .buffer (- margin ) for poly in line_polygons ]
363
379
for j , line in enumerate (lines ):
0 commit comments