OS2WebCore
diff --git a/‎css/os2web_cp_service.css
100644100755 b/‎css/os2web_cp_service.css
100644100755
diff --git a/‎images/cal.png
100644100755 b/‎images/cal.png
100644100755
diff --git a/‎js/jquery.qtip-1.0.0-rc3.min.js
100644100755 b/‎js/jquery.qtip-1.0.0-rc3.min.js
100644100755
diff --git a/‎js/os2web_cp_service.js
100644100755 b/‎js/os2web_cp_service.js
100644100755
diff --git a/‎os2web_cp_service.info
100644100755
Lines changed: 1 addition & 1 deletion b/‎os2web_cp_service.info
100644100755
Lines changed: 1 addition & 1 deletion
diff --git a/‎os2web_cp_service.install
100644100755
Lines changed: 39 additions & 0 deletions b/‎os2web_cp_service.install
100644100755
Lines changed: 39 additions & 0 deletions
diff --git a/‎os2web_cp_service.make
100644100755 b/‎os2web_cp_service.make
100644100755
diff --git a/‎os2web_cp_service.module
100644100755
Lines changed: 208 additions & 3 deletions b/‎os2web_cp_service.module
100644100755
Lines changed: 208 additions & 3 deletions
diff --git a/‎os2web_cp_service.strongarm.inc
Lines changed: 1 addition & 1 deletion b/‎os2web_cp_service.strongarm.inc
Lines changed: 1 addition & 1 deletion
diff --git a/‎theme/views-exposed-form--os2web-cp-service-cp-case-search.tpl.php
100644100755 b/‎theme/views-exposed-form--os2web-cp-service-cp-case-search.tpl.php
100644100755
@@ -17,7 +17,7 @@ dependencies[] = menu
 dependencies[] = node
 dependencies[] = number
 dependencies[] = options
-dependencies[] = os2web_acadre_esdh
+;dependencies[] = os2web_acadre_esdh
 dependencies[] = page_manager
 dependencies[] = panels
 dependencies[] = pathauto
 
@@ -19,3 +19,42 @@ function os2web_cp_service_install() {
   //  taxonomy_term_save($term);
   // }
 }
+
+function os2web_cp_service_schema() {
+    $schema['os2web_cp_service_documents_conversion'] = array(
+        'description' => 'This table is used as schedule for PDF -> HTML convertion of CP Documents',
+        'fields' => array(
+            'file_id' => array(
+                'description' => 'The ID of the document file located remotely',
+                'type' => 'varchar',
+                'length' => 1024,
+                'serialize' => TRUE,
+            ),
+            'case_nid' => array(
+                'description' => 'The nid of the CP Case, which metadata field should be updated with file content',
+                'type' => 'int',
+                'unsigned' => TRUE,
+            ),
+            'filepath_pdf' => array(
+                'description' => 'The path of the downloaded PDF',
+                'type' => 'varchar',
+                'length' => 1024,
+                'serialize' => TRUE,
+            ),
+            'filepath_html' => array(
+                'description' => 'The path of the created HTML output',
+                'type' => 'varchar',
+                'length' => 1024,
+                'serialize' => TRUE,
+            ),
+	    'status' => array(
+                'description' => 'The information about the job',
+                'type' => 'varchar',
+                'length' => 1024,
+                'serialize' => TRUE,
+            ),
+        ),
+       'primary key' => array('case_nid'),
+    );
+    return $schema;
+}
@@ -6,6 +6,7 @@
  */
 
 include_once 'os2web_cp_service.features.inc';
+define('DEFAULT_CASE_FILE_LIMIT', 50);//number of files to handle at once
 
 /**
  * Implements hook_init().
@@ -73,9 +74,7 @@ function os2web_cp_service_handler() {
  * Callback for the file provider service.
  */
 function os2web_gf_service_handler($file_id) {
-
   if ($url = variable_get('os2web_cp_service_cp_document_fileurl')) {
-
     $username = variable_get('os2web_cp_service_endpoint_user');
     $password = variable_get('os2web_cp_service_endpoint_password');
     if (!empty($username) && !empty($password)) {
@@ -114,7 +113,6 @@ function os2web_gf_service_handler($file_id) {
         $nids = (isset($result['node']))?array_keys($result['node']) : NULL;
 
         $node = node_load(array_pop($nids));
-
         if ($node) {
           $filename = str_replace('/', '_', $node->field_os2web_cp_service_doc_id[LANGUAGE_NONE][0]['value'] . '.' . os2web_cp_service_get_extension_from_mime($header['content_type']));
           drupal_add_http_header('Content-Disposition', 'attachment; filename=' . $filename);
@@ -528,6 +526,8 @@ function os2web_cp_service_create_document(array $data) {
         }
         if ($is_missing) {
           $cnode->field_os2web_cp_service_doc_ref[LANGUAGE_NONE][]['target_id'] = $node->nid;
+          //adding for pdf 2 html conversion, [email protected]
+          os2web_cp_service_schedule_document_pdf2html_conversion($data['fields']['FilID'], $cnode->nid);
           node_save($cnode);
         }
       }
@@ -912,3 +912,208 @@ function os2web_cp_service_os2web_help($sections) {
   return $sections;
 
 }
+/**
+ * Schedules a document for pdf to html conversion by adding the document's to database table.
+ *
+ * @param string $file_id  id of the file on remove server
+ * @param int    $case_nid nid of the destination case, which metadata field should be updated
+ *
+ * @return none
+ */
+function os2web_cp_service_schedule_document_pdf2html_conversion($file_id, $case_nid){
+  db_insert('os2web_cp_service_documents_conversion')
+  ->fields(array(
+    'file_id' => $file_id,
+    'case_nid' => $case_nid,
+    'filepath_pdf' => NULL,
+    'filepath_html' => NULL,
+    'status' => NULL,
+  ))
+  ->execute();
+}
+
+/**
+ * Cron implementation.
+ * Goes through the enrties in database table, 
+ * downloads the documents as pdf, 
+ * and converts the documents from pdf to html and updates the case metadata field with the document contents.
+ *
+ * @return none
+ */
+function os2web_cp_service_cron(){
+    //download
+    $query = db_select('os2web_cp_service_documents_conversion', 'dc');
+    $query->fields('dc',array('file_id'))
+    ->isNull('dc.status')
+    ->range(0,DEFAULT_CASE_FILE_LIMIT);
+    $result = $query->execute();
+    while($record = $result->fetchAssoc()) {
+        _os2web_cp_service_document_download($record['file_id']);
+    }
+    
+    //convert
+    $query = db_select('os2web_cp_service_documents_conversion', 'dc');
+    $query->fields('dc',array('file_id', 'filepath_pdf'))
+    ->condition('dc.status', 'downloaded')
+    ->range(0,DEFAULT_CASE_FILE_LIMIT);
+    $result = $query->execute();
+    while($record = $result->fetchAssoc()) {
+        _os2web_cp_service_document_convert($record['file_id'], $record['filepath_pdf']);
+    }
+    
+    //field updating
+    $query = db_select('os2web_cp_service_documents_conversion', 'dc');
+    $query->fields('dc',array('file_id', 'case_nid', 'filepath_pdf', 'filepath_html'))
+    ->condition('dc.status', 'converted')
+    ->range(0,DEFAULT_CASE_FILE_LIMIT);
+    $result = $query->execute();
+    while($record = $result->fetchAssoc()) {
+        _os2web_cp_service_update_case_metadata($record['file_id'], $record['case_nid'], $record['filepath_pdf'], $record['filepath_html']);
+    }
+}
+
+/**
+ * Download the document file and places it into the temporary location
+ * also updates the database entry with the created filepath
+ *
+ * If file is not found on the remote server, the status is changed to "ERROR: 404 not found"
+ * If ULR returns anything else than 200 http code, the status is chaned to "ERROR: {httpCode}"
+ *
+ * @param $file_id string id of the document file
+ *
+ * @return none
+ */
+function _os2web_cp_service_document_download($file_id){
+  $url  = $GLOBALS['base_url'] . '/?q=os2web/service/gf/v1/' . $file_id;//address of remote file
+  $tmpfname = tempnam(file_directory_temp(), "os2web_cp_document_");//path to where the file will be downloaded
+  
+  $fp = fopen($tmpfname, 'w');
+  
+  $ch = curl_init($url);
+  curl_setopt($ch, CURLOPT_FILE, $fp);
+  $data= curl_exec($ch);
+  
+  //Check for 404 (file not found)
+  $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+  curl_close($ch);
+  fclose($fp);
+  
+  if ($httpCode == 404) {
+    print_r('404');
+    db_update('os2web_cp_service_documents_conversion')
+      ->fields(array(
+	'status' => 'ERROR: 404 not found',
+      ))
+    ->condition('file_id', $file_id)
+    ->execute();
+  } else if ($httpCode == 200) {    
+    db_update('os2web_cp_service_documents_conversion')
+	  ->fields(array(
+	    'filepath_pdf' => $tmpfname,
+	    'status' => 'downloaded',
+	  ))
+    ->condition('file_id', $file_id)
+    ->execute();
+  } else {//unknown error
+    db_update('os2web_cp_service_documents_conversion')
+	  ->fields(array(
+	    'status' => 'ERROR: ' . $httpCode,
+	  ))
+    ->condition('file_id', $file_id)
+    ->execute();
+  }
+}
+
+/**
+ * Converts the downloaded pdf into HTML
+ * also updates the database entry with the created filepath
+ *
+ * If PDF file is not found, the status is changes to NULL, so that the next cron job will download the file again.
+ *
+ * @param $file_id     string id of the document file
+ * @param $path_to_pdf string path to the pdf version of the file
+ *
+ * @return none
+ */
+function _os2web_cp_service_document_convert($file_id, $path_to_pdf){
+    if (!file_exists($path_to_pdf)){//if does not exist, send for redownloading
+      db_update('os2web_cp_service_documents_conversion')
+	->fields(array(
+	  'filepath_pdf' => null,
+	  'status' => null,
+	))
+      ->condition('file_id', $file_id)
+      ->execute();
+    } else {
+      shell_exec('pdf2htmlEX ' . $path_to_pdf . '  --dest-dir ' . file_directory_temp());
+      db_update('os2web_cp_service_documents_conversion')
+	->fields(array(
+	  'filepath_html' => $path_to_pdf . '.html',
+	  'status' => 'converted',
+	))
+      ->condition('file_id', $file_id)
+      ->execute();
+    }
+}
+
+/**
+ * Takes the convent of the html, removed everything except the pure text and then appends this text to case node search metadata field.
+ * In the end the temp files (pdf and html) are deleted.
+ *
+ * If HTML file is not found, the status will be changed to "downloaded" so that the text cron job will convert the file again.
+ * If case node is not found, the status will be chaned to "ERROR: node not found".
+ *
+ * @param $file_id      string id of the document file
+ * @param $case_nid     the node id the case node, which metadata should be updated
+ * @param $path_to_pdf  string path to the pdf version of the file
+ * @param $path_to_html string path to the html version of the file
+ *
+ * @return none
+ */
+function _os2web_cp_service_update_case_metadata($file_id, $case_nid, $path_to_pdf, $path_to_html){
+    if (!file_exists($path_to_html)){//if does not exist, send for reconverting. PDF existence will be checked on that step as well.
+      db_update('os2web_cp_service_documents_conversion')
+      ->fields(array(
+	  'filepath_html' => null,
+	  'status' => 'downloaded',
+      ))
+      ->condition('file_id', $file_id)
+      ->execute();
+    } else {
+      $case_node = node_load($case_nid);
+      if (!$case_node){
+	db_update('os2web_cp_service_documents_conversion')
+	->fields(array(
+	    'status' => 'ERROR: node not found',
+	))
+	->condition('file_id', $file_id)
+	->execute();
+      } else {
+	$text = file_get_contents($path_to_html);
+	
+	//html tags removing
+	$text = str_replace('<p>&nbsp;</p>', ' ', $text); //removing unneeded paragraphs
+	$text = preg_replace('#<style(.*?)>(.*?)</style>#is', ' ', $text);//removing style tags
+	$text = preg_replace('#<script(.*?)>(.*?)</script>#is', ' ', $text);//removing scripts tags
+        $text = str_replace("\r\n", " ", strip_tags($text));
+	$text = str_replace("\n\r", " ", $text);
+	$text = str_replace("\n", " ", $text);
+	$text = str_replace("\r", " ", $text);
+          
+	$search_metadata = $case_node->field_os2web_cp_service_searchmt['und'][0]['value'] . $text;  
+	$case_node->field_os2web_cp_service_searchmt['und'][0]['value'] = $search_metadata;
+	node_save($case_node);
+	
+	db_update('os2web_cp_service_documents_conversion')
+	->fields(array(
+	    'status' => 'done',
+	))
+	->condition('file_id', $file_id)
+	->execute();
+	
+	//tmp files cleanup
+	unlink($path_to_html);
+	unlink($path_to_pdf);
+      }
+    }
+}
@@ -87,7 +87,7 @@ function os2web_cp_service_strongarm() {
   $strongarm->disabled = FALSE; /* Edit this to true to make a default strongarm disabled initially */
   $strongarm->api_version = 1;
   $strongarm->name = 'pathauto_punctuation_slash';
-  $strongarm->value = '2';
+  $strongarm->value = '0';
   $export['pathauto_punctuation_slash'] = $strongarm;
 
   return $export;