Skip to content

Commit 958cd03

Browse files
author
Paul Johnson
authored
V9/bugfix/fix lucene immense raw fields prevent indexing (#11599)
* Added failing test to demonstrate issue with large raw_ fields. * Switched to StoredField to avoid indexing error for immense fields. StringField indexes all the content as a single token and has a max length of 32766. StoredField does not analyze/index the field but enables retrieval with luceneSearcher.Doc(docId) Closes GH #11487
1 parent d147d7d commit 958cd03

File tree

3 files changed

+41
-6
lines changed

3 files changed

+41
-6
lines changed

src/Umbraco.Examine.Lucene/UmbracoExamineIndex.cs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
using Examine.Lucene;
99
using Examine.Lucene.Providers;
1010
using Lucene.Net.Documents;
11-
using Lucene.Net.Index;
12-
using Lucene.Net.Store;
1311
using Microsoft.Extensions.Logging;
1412
using Microsoft.Extensions.Options;
1513
using Umbraco.Cms.Core;
@@ -103,10 +101,7 @@ protected override void OnDocumentWriting(DocumentWritingEventArgs docArgs)
103101
//remove the original value so we can store it the correct way
104102
d.RemoveField(f.Key);
105103

106-
d.Add(new StringField(
107-
f.Key,
108-
f.Value[0].ToString(),
109-
Field.Store.YES));
104+
d.Add(new StoredField(f.Key, f.Value[0].ToString()));
110105
}
111106
}
112107

tests/Umbraco.Tests.Integration/Umbraco.Examine.Lucene/UmbracoExamine/IndexTest.cs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Linq;
4+
using Bogus;
45
using Examine;
6+
using Lucene.Net.Util;
57
using Newtonsoft.Json;
68
using NUnit.Framework;
79
using Umbraco.Cms.Core.Models;
@@ -47,6 +49,43 @@ public void GivenValidationParentNode_WhenContentIndexedUnderDifferentParent_Doc
4749
}
4850
}
4951

52+
[Test]
53+
public void GivenIndexingDocument_WhenRichTextPropertyData_CanStoreImmenseFields()
54+
{
55+
using (GetSynchronousContentIndex(false, out UmbracoContentIndex index, out _, out ContentValueSetBuilder contentValueSetBuilder, null))
56+
{
57+
index.CreateIndex();
58+
59+
ContentType contentType = ContentTypeBuilder.CreateBasicContentType();
60+
contentType.AddPropertyType(new PropertyType(TestHelper.ShortStringHelper, "test", ValueStorageType.Ntext)
61+
{
62+
Alias = "rte",
63+
Name = "RichText",
64+
PropertyEditorAlias = Cms.Core.Constants.PropertyEditors.Aliases.TinyMce
65+
});
66+
67+
Content content = ContentBuilder.CreateBasicContent(contentType);
68+
content.Id = 555;
69+
content.Path = "-1,555";
70+
71+
var luceneStringFieldMaxLength = ByteBlockPool.BYTE_BLOCK_SIZE - 2;
72+
var faker = new Faker();
73+
var immenseText = faker.Random.String(length: luceneStringFieldMaxLength + 10);
74+
75+
content.Properties["rte"].SetValue(immenseText);
76+
77+
IEnumerable<ValueSet> valueSet = contentValueSetBuilder.GetValueSets(content);
78+
index.IndexItems(valueSet);
79+
80+
ISearchResults results = index.Searcher.CreateQuery().Id(555).Execute();
81+
ISearchResult result = results.First();
82+
83+
var key = $"{UmbracoExamineFieldNames.RawFieldPrefix}rte";
84+
Assert.IsTrue(result.Values.ContainsKey(key));
85+
Assert.Greater(result.Values[key].Length, luceneStringFieldMaxLength);
86+
}
87+
}
88+
5089
[Test]
5190
public void GivenIndexingDocument_WhenGridPropertyData_ThenDataIndexedInSegregatedFields()
5291
{

tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
</ItemGroup>
8181

8282
<ItemGroup>
83+
<PackageReference Include="Bogus" Version="33.1.1" />
8384
<PackageReference Include="Examine.Lucene" Version="2.0.1" />
8485
<PackageReference Include="Microsoft.AspNet.WebApi.Client" Version="5.2.7" />
8586
<PackageReference Include="Microsoft.AspNetCore.Mvc.Testing" Version="5.0.11" />

0 commit comments

Comments
 (0)