@@ -1395,7 +1395,6 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
13951395 final Long datasetId = dataset .getId ();
13961396 final String datasetGlobalId = dataset .getGlobalId ().toString ();
13971397
1398- //Constants within loop:
13991398 AutoDetectParser autoParser = null ;
14001399 ParseContext context = null ;
14011400 if (doFullTextIndexing ) {
@@ -1423,9 +1422,9 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
14231422 String datasetVersionId = datasetVersion .getId ().toString ();
14241423 boolean indexThisMetadata = indexableDataset .isFilesShouldBeIndexed ();
14251424 String datasetPersistentURL = dataset .getPersistentURL ();
1426-
1425+ boolean isHarvested = dataset .isHarvested ();
1426+ long startTime = System .currentTimeMillis ();
14271427 for (FileMetadata fileMetadata : fileMetadatas ) {
1428- long startTime = System .currentTimeMillis ();
14291428 DataFile datafile = fileMetadata .getDataFile ();
14301429 LocalDate end = null ;
14311430 LocalDate start = null ;
@@ -1479,7 +1478,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
14791478 }
14801479 /* Full-text indexing using Apache Tika */
14811480 if (doFullTextIndexing ) {
1482- if (!dataset . isHarvested () && !fileMetadata . getDataFile () .isRestricted ()
1481+ if (!isHarvested && !datafile .isRestricted ()
14831482 && !datafile .isFilePackage ()
14841483 && datafile .getFilesize ()!=0
14851484 && datafile .getRetention () == null ) {
@@ -1496,7 +1495,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
14961495 // https://github.com/IQSS/dataverse/issues/5165), so we want to get a handle so
14971496 // we can close it below.
14981497 instream = accessObject .getInputStream ();
1499- if (accessObject .getSize () <= maxSize ) {
1498+ long size = accessObject .getSize ();
1499+ if ((size > 0 ) && (size <= maxSize )) {
15001500 textHandler = new BodyContentHandler (-1 );
15011501 Metadata metadata = new Metadata ();
15021502 /*
@@ -1518,7 +1518,6 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
15181518 e .printStackTrace ();
15191519 }
15201520 } catch (OutOfMemoryError e ) {
1521- textHandler = null ;
15221521 logger .warning (String .format ("Full-text indexing for %s failed due to OutOfMemoryError" ,
15231522 datafile .getDisplayName ()));
15241523 } catch (Error e ) {
@@ -1527,6 +1526,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
15271526 logger .severe (String .format ("Full-text indexing for %s failed due to Error: %s : %s" ,
15281527 datafile .getDisplayName (),e .getClass ().getCanonicalName (), e .getLocalizedMessage ()));
15291528 } finally {
1529+ textHandler = null ;
15301530 IOUtils .closeQuietly (instream );
15311531 }
15321532 }
@@ -1775,6 +1775,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
17751775 try {
17761776 solrClientIndexService .getSolrClient ().add (docs .getDocuments ());
17771777 } catch (SolrServerException | IOException ex ) {
1778+ logger .warning ("Check process-failures logs re: " + ex .getLocalizedMessage ());
17781779 if (ex .getCause () instanceof SolrServerException ) {
17791780 throw new SolrServerException (ex );
17801781 } else if (ex .getCause () instanceof IOException ) {
0 commit comments