+ Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ field.setOmitNorms(true);
+ field.setOmitTermFreqAndPositions(true);
+ document.add(field);
+
+
+ For optimal performance, re-use the TokenStream and Field instance
+ for more than one document:
+
+
+ NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ Field field = new Field(name, stream);
+ field.setOmitNorms(true);
+ field.setOmitTermFreqAndPositions(true);
+ Document document = new Document();
+ document.add(field);
+
+ for(all documents) {
+ stream.setIntValue(value)
+ writer.addDocument(document);
+ }
+
+
+ This stream is not intended to be used in analyzers;
+ it's more for iterating the different precisions during
+ indexing a specific numeric value.
+
+ NOTE: as token streams are only consumed once
+ the document is added to the index, if you index more
+ than one numeric field, use a separate
+ PerFieldAnalyzerWrapper aWrapper =
+ new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+ aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+ aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+
+
+ In this example, StandardAnalyzer will be used for all fields except "firstname"
+ and "lastname", for which KeywordAnalyzer will be used.
+
+ A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+ and query parsing.
+
+ class MyAnalyzer extends Analyzer {
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return new PorterStemFilter(new LowerCaseTokenizer(reader));
+ }
+ }
+
+
+ TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
+ TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+ TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+ TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
+ source2.addSinkTokenStream(sink1);
+ source2.addSinkTokenStream(sink2);
+ TokenStream final1 = new LowerCaseFilter(source1);
+ TokenStream final2 = source2;
+ TokenStream final3 = new EntityDetect(sink1);
+ TokenStream final4 = new URLDetect(sink2);
+ d.add(new Field("f1", final1));
+ d.add(new Field("f2", final2));
+ d.add(new Field("f3", final3));
+ d.add(new Field("f4", final4));
+
+ In this example,
+ ...
+ TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
+ TokenStream final2 = source2.newSinkTokenStream();
+ sink1.consumeAllTokens();
+ sink2.consumeAllTokens();
+ ...
+
+ In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+ Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+
+ SinkTokenizer sink1 = new SinkTokenizer();
+ SinkTokenizer sink2 = new SinkTokenizer();
+ TokenStream source1 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader1), sink1), sink2);
+ TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader2), sink1), sink2);
+ TokenStream final1 = new LowerCaseFilter(source1);
+ TokenStream final2 = source2;
+ TokenStream final3 = new EntityDetect(sink1);
+ TokenStream final4 = new URLDetect(sink2);
+ d.add(new Field("f1", final1));
+ d.add(new Field("f2", final2));
+ d.add(new Field("f3", final3));
+ d.add(new Field("f4", final4));
+
+ In this example,
+ return reusableToken.reinit(string, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+
+
+ public String toString() {
+ return "start=" + startOffset + ",end=" + endOffset;
+ }
+
+
+ This method may be overridden by subclasses.
+
+ public int hashCode() {
+ int code = startOffset;
+ code = code * 31 + endOffset;
+ return code;
+ }
+
+
+ see also
+ document.add(new NumericField(name).setIntValue(value));
+
+
+ For optimal performance, re-use the
+
+ NumericField field = new NumericField(name);
+ Document document = new Document();
+ document.add(field);
+
+ for(all documents) {
+ ...
+ field.setIntValue(value)
+ writer.addDocument(document);
+ ...
+ }
+
+
+ The .Net native types
+ boolean skipTo(int target) {
+ do {
+ if (!next())
+ return false;
+ } while (target > doc());
+ return true;
+ }
+
+ Some implementations are considerably more efficient than that.
+
+ java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+
+
+ IndexReader reader = ...
+ ...
+ IndexReader newReader = r.reopen();
+ if (newReader != reader) {
+ ... // reader was reopened
+ reader.close();
+ }
+ reader = newReader;
+ ...
+
+
+ Be sure to synchronize that code so that other threads,
+ if present, can never use reader after it has been
+ closed and before it's switched to newReader.
+
+ NOTE: If this reader is a near real-time
+ reader (obtained from
+ public boolean skipTo(Term target) {
+ do {
+ if (!next())
+ return false;
+ } while (target > term());
+ return true;
+ }
+
+ Some implementations *could* be considerably more efficient than a linear scan.
+ Check the implementation to be sure.
+
+
+ Analyzer analyzer = new StandardAnalyzer();
+ |
+
+
+ try {
+ writer.close();
+ } finally {
+ if (IndexWriter.isLocked(directory)) {
+ IndexWriter.unlock(directory);
+ }
+ }
+
+
+ after which, you must be certain not to use the writer
+ instance anymore.
+
+ NOTE: if this method hits an OutOfMemoryError
+ you should immediately close the writer, again. See above for details.
+
+
+ // extends getSentinelObject() to return a non-null value.
+ PriorityQueue pq = new MyQueue(numHits);
+ // save the 'top' element, which is guaranteed to not be null.
+ MyObject pqTop = (MyObject) pq.top();
+ <...>
+ // now in order to add a new element, which is 'better' than top (after
+ // you've verified it is better), it is as simple as:
+ pqTop.change().
+ pqTop = pq.updateTop();
+
+
+ NOTE: if this method returns a non-null value, it will be called by
+
+ pq.top().change();
+ pq.adjustTop();
+
+
+ instead of
+
+
+ o = pq.pop();
+ o.change();
+ pq.push(o);
+
+
+
+ pq.top().change();
+ pq.updateTop();
+
+
+ instead of
+
+
+ o = pq.pop();
+ o.change();
+ pq.push(o);
+
+
+
+ Query ::= ( Clause )*
+ Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+
+
+
+ Examples of appropriately formatted queries can be found in the query syntax
+ documentation.
+
+
+
+ In
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ When you pass a boost (title=>5 body=>10) you can get
+
+
+
+ +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ When you pass a boost (title=>5 body=>10) you can get
+
+
+
+ +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+ (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+
+
+
+ (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+
+
+
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+
+
+ The code above would construct a query:
+
+ (filename:query) +(contents:query) -(description:query)
+
+
+
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+
+
+ The code above would construct a query:
+
+
+ (filename:query) +(contents:query) -(description:query)
+
+
+
+ String[] query = {"query1", "query2", "query3"};
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+
+
+ The code above would construct a query:
+
+ (filename:query1) +(contents:query2) -(description:query3)
+
+
+
+ String[] query = {"query1", "query2", "query3"};
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+
+
+ The code above would construct a query:
+
+
+ (filename:query1) +(contents:query2) -(description:query3)
+
+
+
+
|
+ |
+ + frequency½ + | +
+ |
+ + 1 + log ( + | +
+
|
+ + ) + | +
+ queryNorm(q) =
+ |
+
+
|
+
+ |
+ + ∑ + | ++ ( + idf(t) · + t.getBoost() + ) 2 + | +
+ | t in q | ++ |
+ norm(t,d) =
+ |
+ + ∏ + | +
+ |
+
+ | field f in d named as t | ++ |
+ idf(searcher.docFreq(term), searcher.maxDoc());
+
+
+ Note that
+ public int nextDoc() throws IOException {
+ return next() ? doc() : NO_MORE_DOCS;
+ }
+
+
+ NOTE: after the iterator has exhausted you should not call this
+ method, as it may result in unpredicted behavior.
+
+
+ int advance(int target) {
+ int doc;
+ while ((doc = nextDoc()) < target) {
+ }
+ return doc;
+ }
+
+
+ Some implementations are considerably more efficient than that.
+
+ NOTE: certain implemenations may return a different value (each
+ time) if called several times in a row with the same target.
+
+ NOTE: this method may be called with
+ Searcher searcher = new IndexSearcher(indexReader);
+ final BitSet bits = new BitSet(indexReader.maxDoc());
+ searcher.search(query, new Collector() {
+ private int docBase;
+
+ // ignore scorer
+ public void setScorer(Scorer scorer) {
+ }
+
+ // accept docs out of order (for a BitSet it doesn't matter)
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+
+ public void collect(int doc) {
+ bits.set(doc + docBase);
+ }
+
+ public void setNextReader(IndexReader reader, int docBase) {
+ this.docBase = docBase;
+ }
+ });
+
+
+ Not all collectors will need to rebase the docID. For
+ example, a collector that simply counts the total number
+ of hits would skip it.
+
+ NOTE: Prior to 2.9, Lucene silently filtered
+ out hits with score <= 0. As of 2.9, the core Collectors
+ no longer do that. It's very unusual to have such hits
+ (a negative query boost, or function query returning
+ negative custom scores, could cause it to happen). If
+ you need that behavior, use + ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ... ++
+ ModifiedScore = subQueryScore * valSrcScore ++
+ similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
+ return (similarity > minimumSimilarity);
+ where distance is the Levenshtein distance for the two words.
+
+ Levenshtein distance (also known as edit distance) is a measure of similiarity
+ between two strings where the distance is measured as the number of character
+ deletions, insertions or substitutions required to transform one string to
+ the other string.
+
+ Searcher searcher = new IndexSearcher(indexReader);
+ final BitSet bits = new BitSet(indexReader.maxDoc());
+ searcher.search(query, new HitCollector() {
+ public void collect(int doc, float score) {
+ bits.set(doc);
+ }
+ });
+
+
+ Note: This is called in an inner search loop. For good search
+ performance, implementations of this method should not call
+
+ PriorityQueue pq = new HitQueue(10, true); // pre-populate.
+ ScoreDoc top = pq.top();
+
+ // Add/Update one element.
+ top.score = 1.0f;
+ top.doc = 0;
+ top = (ScoreDoc) pq.updateTop();
+ int totalHits = 1;
+
+ // Now pop only the elements that were *truly* inserted.
+ // First, pop all the sentinel elements (there are pq.size() - totalHits).
+ for (int i = pq.size() - totalHits; i > 0; i--) pq.pop();
+
+ // Now pop the truly added elements.
+ ScoreDoc[] results = new ScoreDoc[totalHits];
+ for (int i = totalHits - 1; i >= 0; i--) {
+ results[i] = (ScoreDoc) pq.pop();
+ }
+
+
+ NOTE: This class pre-allocate a full array of
+ length
+ TopDocs topDocs = searcher.Search(query, numHits);
+ ScoreDoc[] hits = topDocs.scoreDocs;
+ for (int i = 0; i < hits.Length; i++) {
+ int docId = hits[i].doc;
+ Document d = searcher.Doc(docId);
+ // do something with current hit
+ ...
+
+
+ Filter f = NumericRangeFilter.newFloatRange("weight",
+ new Float(0.3f), new Float(0.10f),
+ true, true);
+
+
+ accepts all documents whose float valued "weight" field
+ ranges from 0.3 to 0.10, inclusive.
+ See
+ Query q = NumericRangeQuery.newFloatRange("weight",
+ new Float(0.3f), new Float(0.10f),
+ true, true);
+
+
+ matches all documents whose float valued "weight" field
+ ranges from 0.3 to 0.10, inclusive.
+
+ The performance of NumericRangeQuery is much better
+ than the corresponding Schindler, U, Diepenbroek, M, 2008. + Generic XML-based Framework for Metadata Portals. + Computers & Geosciences 34 (12), 1947-1955. + doi:10.1016/j.cageo.2008.02.023+ + A quote from this paper: Because Apache Lucene is a full-text + search engine and not a conventional database, it cannot handle numerical ranges + (e.g., field value is inside user defined bounds, even dates are numerical values). + We have developed an extension to Apache Lucene that stores + the numerical values in a special string-encoded format with variable precision + (all numerical values like doubles, longs, floats, and ints are converted to + lexicographic sortable string representations and stored with different precisions + (for a more detailed description of how the values are stored, + see
+ n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 )
+
+ (this formula is only correct, when + teacherid: 1 + studentfirstname: james + studentsurname: jones + + teacherid: 2 + studenfirstname: james + studentsurname: smith + studentfirstname: sally + studentsurname: jones ++ + a SpanNearQuery with a slop of 0 can be applied across two +
+ SpanQuery q1 = new SpanTermQuery(new Term("studentfirstname", "james"));
+ SpanQuery q2 = new SpanTermQuery(new Term("studentsurname", "jones"));
+ SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname");
+ Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false);
+
+ to search for 'studentfirstname:james studentsurname:jones' and find
+ teacherid 1 without matching teacherid 2 (which has a 'james' in position 0
+ and 'jones' in position 1).
+
+ Note: as
+ boolean skipTo(int target) {
+ do {
+ if (!next())
+ return false;
+ } while (target > doc());
+ return true;
+ }
+
+ Most implementations are considerably more efficient than that.
+
+ new Lock.With(directory.makeLock("my.lock")) {
+ public Object doBody() {
+ ... code to execute while locked ...
+ }
+ }.run();
+
+
+
+ cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 3.36 | 3.96 | 1.44 | 1.46 | 1.99 | 1.58 | +
1% full | 3.31 | 3.90 | 1.04 | 0.99 | +
cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 2.50 | 3.50 | 1.00 | 1.03 | 1.12 | 1.25 | +
1% full | 2.51 | 3.49 | 1.00 | 1.02 | +
/*...*/
containing the specified text.
+ /*...*/
containing the specified text.
+
+
+
+ /*...*/
containing the specified text.
+
+
+
+
+ /*...*/
containing the specified text.
+
+
+
+
+
+ Info
will exclude Verbose
messages and include Info
,
+ Warning
and Error
messages.
+ Info
will exclude Verbose
messages and include Info
,
+ Warning
and Error
messages.
+ Info
will exclude Verbose
messages and include Info
,
+ Warning
and Error
messages.
+ var p = GetProperty(alias); return p == null ? null : p.Value;
and nothing else.
+ var types = PluginManager.Current.ResolveTypes{PublishedContentModel}();
+ var factory = new PublishedContentModelFactoryImpl(types);
+ PublishedContentModelFactoryResolver.Current.SetFactory(factory);
+
+ using
(C#) statement.
+
+
+ using (DisposableTimer.TraceDuration{MyType}("starting", "finished"))
+ {
+ Thread.Sleep(567);
+ }
+
+ Console.WriteLine("Testing Stopwatchdisposable, should be 567:");
+ using (var timer = new DisposableTimer(result => Console.WriteLine("Took {0}ms", result)))
+ {
+ Thread.Sleep(567);
+ }
+
+
+
+
+
+
var p = GetProperty(alias); return p == null ? null : p.Value;
and nothing else.
+
+ [standard umbraco node Xml]
+
+
+