|
4 | 4 |
|
5 | 5 | namespace Nest |
6 | 6 | { |
| 7 | + /// <summary> |
| 8 | + /// Filter terms returned based on their TF-IDF scores. |
| 9 | + /// This can be useful in order find out a good characteristic vector of a document. |
| 10 | + /// </summary> |
7 | 11 | [JsonObject(MemberSerialization.OptIn)] |
8 | 12 | public interface ITermVectorFilter |
9 | 13 | { |
| 14 | + /// <summary> |
| 15 | + /// Maximum number of terms that must be returned per field. Defaults to 25. |
| 16 | + /// </summary> |
10 | 17 | [JsonProperty("max_num_terms")] |
11 | 18 | int? MaximumNumberOfTerms { get; set; } |
12 | 19 |
|
| 20 | + /// <summary> |
| 21 | + /// Ignore words with less than this frequency in the source doc. Defaults to 1. |
| 22 | + /// </summary> |
13 | 23 | [JsonProperty("min_term_freq")] |
14 | 24 | int? MinimumTermFrequency { get; set; } |
15 | 25 |
|
| 26 | + /// <summary> |
| 27 | + /// Ignore words with more than this frequency in the source doc. Defaults to unbounded. |
| 28 | + /// </summary> |
| 29 | + [JsonProperty("max_term_freq")] |
| 30 | + int? MaximumTermFrequency { get; set; } |
| 31 | + |
| 32 | + /// <summary> |
| 33 | + /// Ignore terms which do not occur in at least this many docs. Defaults to 1. |
| 34 | + /// </summary> |
16 | 35 | [JsonProperty("min_doc_freq")] |
17 | 36 | int? MinimumDocumentFrequency { get; set; } |
| 37 | + |
| 38 | + /// <summary> |
| 39 | + /// Ignore words which occur in more than this many docs. Defaults to unbounded. |
| 40 | + /// </summary> |
| 41 | + [JsonProperty("max_doc_freq")] |
| 42 | + int? MaximumDocumentFrequency { get; set; } |
| 43 | + |
| 44 | + /// <summary> |
| 45 | + /// The minimum word length below which words will be ignored. Defaults to 0. |
| 46 | + /// </summary> |
| 47 | + [JsonProperty("min_word_length")] |
| 48 | + int? MinimumWordLength { get; set; } |
| 49 | + |
| 50 | + /// <summary> |
| 51 | + /// The maximum word length above which words will be ignored. Defaults to unbounded. |
| 52 | + /// </summary> |
| 53 | + [JsonProperty("max_word_length")] |
| 54 | + int? MaximumWordLength { get; set; } |
18 | 55 | } |
19 | 56 |
|
| 57 | + /// <summary> |
| 58 | + /// Filter terms returned based on their TF-IDF scores. |
| 59 | + /// This can be useful in order find out a good characteristic vector of a document. |
| 60 | + /// </summary> |
20 | 61 | public class TermVectorFilter : ITermVectorFilter |
21 | 62 | { |
| 63 | + /// <summary> |
| 64 | + /// Maximum number of terms that must be returned per field. Defaults to 25. |
| 65 | + /// </summary> |
22 | 66 | public int? MaximumNumberOfTerms { get; set; } |
23 | 67 |
|
| 68 | + /// <summary> |
| 69 | + /// Ignore words with less than this frequency in the source doc. Defaults to 1. |
| 70 | + /// </summary> |
24 | 71 | public int? MinimumTermFrequency { get; set; } |
25 | 72 |
|
| 73 | + /// <summary> |
| 74 | + /// Ignore words with more than this frequency in the source doc. Defaults to unbounded. |
| 75 | + /// </summary> |
| 76 | + public int? MaximumTermFrequency { get; set; } |
| 77 | + |
| 78 | + /// <summary> |
| 79 | + /// Ignore terms which do not occur in at least this many docs. Defaults to 1. |
| 80 | + /// </summary> |
26 | 81 | public int? MinimumDocumentFrequency { get; set; } |
| 82 | + |
| 83 | + /// <summary> |
| 84 | + /// Ignore words which occur in more than this many docs. Defaults to unbounded. |
| 85 | + /// </summary> |
| 86 | + public int? MaximumDocumentFrequency { get; set; } |
| 87 | + |
| 88 | + /// <summary> |
| 89 | + /// The minimum word length below which words will be ignored. Defaults to 0. |
| 90 | + /// </summary> |
| 91 | + public int? MinimumWordLength { get; set; } |
| 92 | + |
| 93 | + /// <summary> |
| 94 | + /// The maximum word length above which words will be ignored. Defaults to unbounded. |
| 95 | + /// </summary> |
| 96 | + public int? MaximumWordLength { get; set; } |
27 | 97 | } |
28 | 98 |
|
| 99 | + /// <summary> |
| 100 | + /// Filter terms returned based on their TF-IDF scores. |
| 101 | + /// This can be useful in order find out a good characteristic vector of a document. |
| 102 | + /// </summary> |
29 | 103 | public class TermVectorFilterDescriptor |
30 | 104 | : DescriptorBase<TermVectorFilterDescriptor, ITermVectorFilter>, ITermVectorFilter |
31 | 105 | { |
32 | 106 | int? ITermVectorFilter.MaximumNumberOfTerms { get; set; } |
33 | | - |
34 | | - int? ITermVectorFilter.MinimumDocumentFrequency { get; set; } |
35 | | - |
36 | 107 | int? ITermVectorFilter.MinimumTermFrequency { get; set; } |
| 108 | + int? ITermVectorFilter.MaximumTermFrequency { get; set; } |
| 109 | + int? ITermVectorFilter.MinimumDocumentFrequency { get; set; } |
| 110 | + int? ITermVectorFilter.MaximumDocumentFrequency { get; set; } |
| 111 | + int? ITermVectorFilter.MinimumWordLength { get; set; } |
| 112 | + int? ITermVectorFilter.MaximumWordLength { get; set; } |
37 | 113 |
|
| 114 | + /// <summary> |
| 115 | + /// Maximum number of terms that must be returned per field. Defaults to 25. |
| 116 | + /// </summary> |
38 | 117 | public TermVectorFilterDescriptor MaximimumNumberOfTerms(int maxNumTerms) => Assign(a => a.MaximumNumberOfTerms = maxNumTerms); |
39 | 118 |
|
| 119 | + /// <summary> |
| 120 | + /// Ignore words with less than this frequency in the source doc. Defaults to 1. |
| 121 | + /// </summary> |
| 122 | + public TermVectorFilterDescriptor MinimumTermFrequency(int minTermFreq) => Assign(a => a.MinimumTermFrequency = minTermFreq); |
| 123 | + |
| 124 | + /// <summary> |
| 125 | + /// Ignore words with more than this frequency in the source doc. Defaults to unbounded. |
| 126 | + /// </summary> |
| 127 | + public TermVectorFilterDescriptor MaximumTermFrequency(int maxTermFreq) => Assign(a => a.MaximumTermFrequency = maxTermFreq); |
| 128 | + |
| 129 | + /// <summary> |
| 130 | + /// Ignore terms which do not occur in at least this many docs. Defaults to 1. |
| 131 | + /// </summary> |
40 | 132 | public TermVectorFilterDescriptor MinimumDocumentFrequency(int minDocFreq) => Assign(a => a.MinimumDocumentFrequency = minDocFreq); |
41 | 133 |
|
42 | | - public TermVectorFilterDescriptor MinimumTermFrequency(int minTermFreq) => Assign(a => a.MinimumTermFrequency = minTermFreq); |
| 134 | + /// <summary> |
| 135 | + /// Ignore words which occur in more than this many docs. Defaults to unbounded. |
| 136 | + /// </summary> |
| 137 | + public TermVectorFilterDescriptor MaximumDocumentFrequency(int maxDocFreq) => Assign(a => a.MaximumDocumentFrequency = maxDocFreq); |
| 138 | + |
| 139 | + /// <summary> |
| 140 | + /// The minimum word length below which words will be ignored. Defaults to 0. |
| 141 | + /// </summary> |
| 142 | + public TermVectorFilterDescriptor MinimumWordLength(int minWordLength) => Assign(a => a.MinimumWordLength = minWordLength); |
| 143 | + |
| 144 | + /// <summary> |
| 145 | + /// The maximum word length above which words will be ignored. Defaults to unbounded. |
| 146 | + /// </summary> |
| 147 | + public TermVectorFilterDescriptor MaximumWordLength(int maxWordLength) => Assign(a => a.MaximumWordLength = maxWordLength); |
43 | 148 | } |
44 | 149 | } |
0 commit comments