-
Notifications
You must be signed in to change notification settings - Fork 930
Open
Description
Had this issue.
in detect_text -> def get_prediction :
in this line: polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
if len(polys) > 0:
polys = np.array(polys)
for k in range(len(polys)):
if polys[k] is not None:
polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
return polys
fixed by:
def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
adjusted_polys = []
if len(polys) > 0:
print("Before adjustment:")
for k in range(len(polys)):
if polys[k] is not None:
print(f"Poly {k}: {polys[k]} - Shape: {np.array(polys[k]).shape}")
for k in range(len(polys)):
if polys[k] is not None:
adjusted_poly = np.array(polys[k]) * (ratio_w * ratio_net, ratio_h * ratio_net)
adjusted_polys.append(adjusted_poly)
print("After adjustment:")
for k in range(len(adjusted_polys)):
print(f"Poly {k}: {adjusted_polys[k]} - Shape: {np.array(adjusted_polys[k]).shape}")
return adjusted_polys
and update def get_prediction:
def get_prediction(
image,
craft_net,
refine_net=None,
text_threshold: float = 0.7,
link_threshold: float = 0.4,
low_text: float = 0.4,
cuda: bool = False,
long_size: int = 1280,
poly: bool = True,
):
t0 = time.time()
# read/convert image
image = image_utils.read_image(image)
# resize
img_resized, target_ratio, size_heatmap = image_utils.resize_aspect_ratio(
image, long_size, interpolation=cv2.INTER_LINEAR
)
ratio_h = ratio_w = 1 / target_ratio
resize_time = time.time() - t0
t0 = time.time()
# preprocessing
x = image_utils.normalizeMeanVariance(img_resized)
x = torch_utils.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w]
x = torch_utils.Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w]
if cuda:
x = x.cuda()
preprocessing_time = time.time() - t0
t0 = time.time()
# forward pass
with torch_utils.no_grad():
y, feature = craft_net(x)
craftnet_time = time.time() - t0
t0 = time.time()
# make score and link map
score_text = y[0, :, :, 0].cpu().data.numpy()
score_link = y[0, :, :, 1].cpu().data.numpy()
# refine link
if refine_net is not None:
with torch_utils.no_grad():
y_refiner = refine_net(y, feature)
score_link = y_refiner[0, :, :, 0].cpu().data.numpy()
refinenet_time = time.time() - t0
t0 = time.time()
# Post-processing
boxes, polys = craft_utils.getDetBoxes(
score_text, score_link, text_threshold, link_threshold, low_text, poly
)
# coordinate adjustment
boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
for k in range(len(polys)):
if polys[k] is None:
polys[k] = boxes[k]
# get image size
img_height = image.shape[0]
img_width = image.shape[1]
# calculate box coords as ratios to image size
boxes_as_ratio = []
for box in boxes:
boxes_as_ratio.append(box / [img_width, img_height])
boxes_as_ratio = np.array(boxes_as_ratio)
# calculate poly coords as ratios to image size
polys_as_ratio = []
for poly in polys:
polys_as_ratio.append(poly / [img_width, img_height])
polys_as_ratio = np.array(polys_as_ratio, dtype=object) # Use dtype=object to handle varying shapes
text_score_heatmap = image_utils.cvt2HeatmapImg(score_text)
link_score_heatmap = image_utils.cvt2HeatmapImg(score_link)
postprocess_time = time.time() - t0
times = {
"resize_time": resize_time,
"preprocessing_time": preprocessing_time,
"craftnet_time": craftnet_time,
"refinenet_time": refinenet_time,
"postprocess_time": postprocess_time,
}
return {
"boxes": boxes,
"boxes_as_ratios": boxes_as_ratio,
"polys": polys,
"polys_as_ratios": polys_as_ratio,
"heatmaps": {
"text_score_heatmap": text_score_heatmap,
"link_score_heatmap": link_score_heatmap,
},
"times": times,
}
"""
The issue lies in the inconsistent shapes of the polygons (polys). Specifically, Poly 3 has a shape of (14, 2), while the others have a shape of (4, 2). This inconsistency causes the error when attempting to convert the list of polygons to a NumPy array.
To handle this, we need to ensure that all polygons have a consistent shape or handle them differently if they are of varying shapes. We can either pad the polygons to the same size or handle them in a way that does not require converting them to a single NumPy array.
"""
Metadata
Metadata
Assignees
Labels
No labels