Skip to content

Commit 561c403

Browse files
author
Stanislav
committed
pdf2html is now converting converting into another directory, and splitting file
1 parent 3c387f4 commit 561c403

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

os2web_cp_service.module

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,10 +1023,12 @@ function _os2web_cp_service_document_convert($file_id, $path_to_pdf){
10231023
->condition('file_id', $file_id)
10241024
->execute();
10251025
} else {
1026-
shell_exec('pdf2htmlEX ' . $path_to_pdf . ' --dest-dir ' . file_directory_temp());
1026+
$file_name = explode('/', $path_to_pdf);
1027+
1028+
shell_exec('pdf2htmlEX ' . $path_to_pdf . ' --single-html 0 --dest-dir ' . file_directory_temp() . '/' . $file_id);//saving in separate directory
10271029
db_update('os2web_cp_service_documents_conversion')
10281030
->fields(array(
1029-
'filepath_html' => $path_to_pdf . '.html',
1031+
'filepath_html' => file_directory_temp() . '/' . $file_id . '/' . end($file_name) . '.html',
10301032
'status' => 'converted',
10311033
))
10321034
->condition('file_id', $file_id)
@@ -1071,7 +1073,7 @@ function _os2web_cp_service_update_case_metadata($file_id, $case_nid, $path_to_p
10711073

10721074
//html tags removing
10731075
$text = str_replace('<p>&nbsp;</p>', ' ', $text); //removing unneeded paragraphs
1074-
$text = preg_replace('#<style(.*?)>(.*?)</style>#is', ' ', $text);//removing style tags
1076+
//$text = preg_replace('#<style(.*?)>(.*?)</style>#is', ' ', $text);//removing style tags
10751077
$text = preg_replace('#<script(.*?)>(.*?)</script>#is', ' ', $text);//removing scripts tags
10761078
$text = str_replace("\r\n", " ", strip_tags($text));
10771079
$text = str_replace("\n\r", " ", $text);
@@ -1090,7 +1092,7 @@ function _os2web_cp_service_update_case_metadata($file_id, $case_nid, $path_to_p
10901092
->execute();
10911093

10921094
//tmp files cleanup
1093-
unlink($path_to_html);
1095+
file_unmanaged_delete_recursive(file_directory_temp() . '/' . $file_id);
10941096
unlink($path_to_pdf);
10951097
}
10961098
}

0 commit comments

Comments
 (0)