@@ -1078,27 +1078,27 @@ function _os2web_cp_service_document_download($file_id) {
1078
1078
* @param string $path_to_pdf
1079
1079
* path to the pdf version of the file
1080
1080
*/
1081
- function _os2web_cp_service_document_convert($file_id, $path_to_pdf) {
1082
- // If does not exist, send for redownloading.
1083
- if (!file_exists($path_to_pdf)) {
1084
- db_update('os2web_cp_service_documents_conversion')
1085
- ->fields(array(
1086
- 'filepath_pdf' => NULL,
1087
- 'status' => NULL,
1088
- ))
1081
+ function _os2web_cp_service_document_convert($file_id, $path_to_pdf){
1082
+ if (!file_exists($path_to_pdf)){//if does not exist, send for redownloading
1083
+ db_update('os2web_cp_service_documents_conversion')
1084
+ ->fields(array(
1085
+ 'filepath_pdf' => null,
1086
+ 'status' => null,
1087
+ ))
1089
1088
->condition('file_id', $file_id)
1090
1089
->execute();
1091
- }
1092
- else {
1093
- shell_exec('pdf2htmlEX ' . $path_to_pdf . ' --dest-dir ' . file_directory_temp());
1094
- db_update('os2web_cp_service_documents_conversion')
1095
- ->fields(array(
1096
- 'filepath_html' => $path_to_pdf . '.html',
1097
- 'status' => 'converted',
1098
- ))
1099
- ->condition('file_id', $file_id)
1100
- ->execute();
1101
- }
1090
+ } else {
1091
+ $file_name = explode('/', $path_to_pdf);
1092
+
1093
+ shell_exec('pdf2htmlEX ' . $path_to_pdf . ' --single-html 0 --dest-dir ' . file_directory_temp() . '/' . $file_id);//saving in separate directory
1094
+ db_update('os2web_cp_service_documents_conversion')
1095
+ ->fields(array(
1096
+ 'filepath_html' => file_directory_temp() . '/' . $file_id . '/' . end($file_name) . '.html',
1097
+ 'status' => 'converted',
1098
+ ))
1099
+ ->condition('file_id', $file_id)
1100
+ ->execute();
1101
+ }
1102
1102
}
1103
1103
1104
1104
/**
@@ -1123,60 +1123,54 @@ function _os2web_cp_service_document_convert($file_id, $path_to_pdf) {
1123
1123
* Path to the html version of the file
1124
1124
*/
1125
1125
function _os2web_cp_service_update_case_metadata($file_id, $case_nid, $path_to_pdf, $path_to_html) {
1126
- // If does not exist, send for reconverting.
1127
- // PDF existence will be checked on that step as well.
1128
- if (!file_exists($path_to_html)) {
1129
- db_update('os2web_cp_service_documents_conversion')
1130
- ->fields(array(
1131
- 'filepath_html' => NULL,
1132
- 'status' => 'downloaded',
1133
- ))
1134
- ->condition('file_id', $file_id)
1135
- ->execute();
1136
- }
1137
- else {
1126
+ if (!file_exists($path_to_html)){//if does not exist, send for reconverting. PDF existence will be checked on that step as well.
1127
+ db_update('os2web_cp_service_documents_conversion')
1128
+ ->fields(array(
1129
+ 'filepath_html' => null,
1130
+ 'status' => 'downloaded',
1131
+ ))
1132
+ ->condition('file_id', $file_id)
1133
+ ->execute();
1134
+ } else {
1138
1135
$case_node = node_load($case_nid);
1139
- if (!$case_node) {
1136
+ if (!$case_node){
1140
1137
db_update('os2web_cp_service_documents_conversion')
1141
1138
->fields(array(
1142
- 'status' => 'ERROR: node not found',
1139
+ 'status' => 'ERROR: node not found',
1143
1140
))
1144
1141
->condition('file_id', $file_id)
1145
1142
->execute();
1146
- }
1147
- else {
1143
+ } else {
1148
1144
$text = file_get_contents($path_to_html);
1149
1145
1150
1146
// Html tags removing.
1151
1147
// Removing unneeded paragraphs.
1152
1148
$text = str_replace('<p> </p>', ' ', $text);
1153
- // Removing style tags.
1154
- $text = preg_replace('#<style(.*?)>(.*?)</style>#is', ' ', $text);
1155
- // Removing scripts tags.
1149
+ // Removing script tags.
1156
1150
$text = preg_replace('#<script(.*?)>(.*?)</script>#is', ' ', $text);
1157
1151
$text = str_replace("\r\n", " ", strip_tags($text));
1158
1152
$text = str_replace("\n\r", " ", $text);
1159
1153
$text = str_replace("\n", " ", $text);
1160
1154
$text = str_replace("\r", " ", $text);
1161
1155
if (isset($case_node->field_os2web_cp_service_searchmt[LANGUAGE_NONE][0]['value'])) {
1162
- $search_metadata = $case_node->field_os2web_cp_service_searchmt[LANGUAGE_NONE][0]['value'] . $text;
1156
+ $search_metadata = $case_node->field_os2web_cp_service_searchmt[LANGUAGE_NONE][0]['value'] . $text;
1163
1157
}
1164
1158
else {
1165
- $search_metadata = $text;
1159
+ $search_metadata = $text;
1166
1160
}
1167
1161
$case_node->field_os2web_cp_service_searchmt[LANGUAGE_NONE][0]['value'] = $search_metadata;
1168
1162
node_save($case_node);
1169
-
1163
+
1170
1164
db_update('os2web_cp_service_documents_conversion')
1171
1165
->fields(array(
1172
- 'status' => 'done',
1166
+ 'status' => 'done',
1173
1167
))
1174
1168
->condition('file_id', $file_id)
1175
1169
->execute();
1176
-
1177
- // Tmp files cleanup.
1178
- unlink($path_to_html );
1170
+
1171
+ //tmp files cleanup
1172
+ file_unmanaged_delete_recursive(file_directory_temp() . '/' . $file_id );
1179
1173
unlink($path_to_pdf);
1180
1174
}
1181
- }
1175
+ }
1182
1176
}
0 commit comments