Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 24 additions & 27 deletions src/lib_ccx/ocr.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#include "ccx_encoders_helpers.h"
#include "ccx_encoders_spupng.h"
#include "ocr.h"
#undef OCR_DEBUG

struct ocrCtx
{
Expand Down Expand Up @@ -686,7 +685,6 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
TessResultIteratorDelete(ri);
}
// End Color Detection
freep(&text_out);
boxDestroy(&crop_points);

pixDestroy(&pix);
Expand All @@ -698,47 +696,31 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
return text_out;
}

void erode(png_color *palette, png_byte *alpha, uint8_t *bitmap, int w, int h, int nb_color)
void erode(png_color *palette, png_byte *alpha, uint8_t *bitmap, int w, int h, int nb_color, int background_index)
{
int background_index;
for (background_index = 0; background_index < nb_color; background_index++)
{
if (alpha[background_index])
{
break;
}
}
// we will use a 2*2 kernel for the erosion
for (int row = 0; row < h - 1; row++)
{
for (int col = 0; col < w - 1; col++)
{
if (alpha[bitmap[row * w + col]] || alpha[bitmap[(row + 1) * w + col]] ||
alpha[bitmap[row * w + (col + 1)]] || alpha[bitmap[(row + 1) * w + (col + 1)]])
if (bitmap[row * w + col] == background_index || bitmap[(row + 1) * w + col] == background_index ||
bitmap[row * w + (col + 1)] == background_index || bitmap[(row + 1) * w + (col + 1)] == background_index)
{
bitmap[row * w + col] = background_index;
}
}
}
}

void dilate(png_color *palette, png_byte *alpha, uint8_t *bitmap, int w, int h, int nb_color)
void dilate(png_color *palette, png_byte *alpha, uint8_t *bitmap, int w, int h, int nb_color, int foreground_index)
{
int foreground_index;
for (foreground_index = 0; foreground_index < nb_color; foreground_index++)
{
if (!alpha[foreground_index])
{
break;
}
}
// we will use a 2*2 kernel for the erosion
for (int row = 0; row < h - 1; row++)
{
for (int col = 0; col < w - 1; col++)
{
if (!(alpha[bitmap[row * w + col]] && alpha[bitmap[(row + 1) * w + col]] &&
alpha[bitmap[row * w + (col + 1)]] && alpha[bitmap[(row + 1) * w + (col + 1)]]))
if ((bitmap[row * w + col] == foreground_index && bitmap[(row + 1) * w + col] == foreground_index &&
bitmap[row * w + (col + 1)] == foreground_index && bitmap[(row + 1) * w + (col + 1)] == foreground_index))
{
bitmap[row * w + col] = foreground_index;
}
Expand Down Expand Up @@ -769,6 +751,7 @@ static int quantize_map(png_byte *alpha, png_color *palette,
*/
uint32_t *mcit = NULL;
struct transIntensity ti = {alpha, palette};
int text_color, text_bg_color;

int ret = 0;

Expand Down Expand Up @@ -835,6 +818,14 @@ static int quantize_map(png_byte *alpha, png_color *palette,
max_ind = j;
}
}

// Assume second most frequent color to be text background (first is alpha channel)
if (i == 1)
text_bg_color = iot[max_ind];
// Assume third most frequent color to be text color
if (i == 2)
text_color = iot[max_ind];

for (j = i; j > 0 && max_ind < mcit[j - 1]; j--)
{
mcit[j] = mcit[j - 1];
Expand Down Expand Up @@ -878,8 +869,8 @@ static int quantize_map(png_byte *alpha, png_color *palette,
palette[iot[i]].green = palette[index].green;
}
}
erode(palette, alpha, bitmap, w, h, nb_color);
dilate(palette, alpha, bitmap, w, h, nb_color);
erode(palette, alpha, bitmap, w, h, nb_color, text_bg_color);
dilate(palette, alpha, bitmap, w, h, nb_color, text_color);
#ifdef OCR_DEBUG
ccx_common_logging.log_ftn("Colors present in quantized Image\n");
for (int i = 0; i < nb_color; i++)
Expand Down Expand Up @@ -1062,7 +1053,13 @@ char *paraof_ocrtext(struct cc_subtitle *sub, struct encoder_ctx *context)
len += strlen(rect->ocr_text);
}
if (len <= 0)
{
for (i = 0, rect = sub->data; i < sub->nb_data; i++, rect++)
{
freep(&rect->ocr_text);
}
return NULL;
}
else
{
str = malloc(len + 1 + 10); // Extra space for possible trailing '/n's at the end of tesseract UTF8 text
Expand All @@ -1076,7 +1073,7 @@ char *paraof_ocrtext(struct cc_subtitle *sub, struct encoder_ctx *context)
if (!rect->ocr_text)
continue;
add_ocrtext2str(str, rect->ocr_text, context->encoded_crlf, context->encoded_crlf_length);
free(rect->ocr_text);
freep(&rect->ocr_text);
}
return str;
}
Expand Down
Loading