@@ -235,7 +235,7 @@ class Manifestation < ApplicationRecord
235235 end
236236 after_create :clear_cached_numdocs
237237 after_destroy :index_series_statement
238- after_save :index_series_statement , :extract_text!
238+ after_save :index_series_statement
239239 after_touch do |manifestation |
240240 manifestation . index
241241 manifestation . index_series_statement
@@ -367,24 +367,21 @@ def self.pickup(keyword = nil, current_user = nil)
367367 end
368368
369369 def extract_text
370- return nil unless attachment . attached?
371- return nil unless ENV [ 'ENJU_EXTRACT_TEXT ' ] == 'true'
370+ return unless attachment . attached?
371+ return unless ENV [ 'ENJU_LEAF_EXTRACT_TEXT ' ] == 'true'
372372
373- client = Faraday . new ( url : ENV [ 'SOLR_URL' ] || Sunspot . config . solr . url ) do |conn |
374- conn . request :multipart
373+ client = Faraday . new ( url : ENV [ 'TIKA_URL' ] || 'http://tika:9998' ) do |conn |
375374 conn . adapter :net_http
376375 end
377- response = client . post ( 'update/extract?extractOnly=true&wt=json&extractFormat=text' ) do |req |
378- req . headers [ 'Content-type' ] = 'text/html'
379- req . body = attachment . download
376+
377+ response = client . put ( '/tika/text' ) do |req |
378+ req . headers [ 'Content-Type' ] = attachment . content_type
379+ req . headers [ 'Content-Length' ] = attachment . byte_size . to_s
380+ req . body = Faraday ::UploadIO . new ( StringIO . new ( attachment . download ) , attachment . content_type )
380381 end
381- update_column ( :fulltext , JSON . parse ( response . body ) [ "" ] )
382- end
383382
384- def extract_text!
385- extract_text
386- index
387- Sunspot . commit
383+ payload = JSON . parse ( response . body ) [ 'X-TIKA:content' ] . strip . tr ( "\t " , " " ) . gsub ( /\r ?\n / , "" )
384+ payload
388385 end
389386
390387 def created ( agent )
0 commit comments