Skip to content

Commit 95eb0bd

Browse files
committed
Refactor sampling logic to address numerical instability as well as to arithmetic overflow
1 parent 5e2ed5c commit 95eb0bd

26 files changed

+1113
-271
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright (c) 2017 Scaleborn UG, www.scaleborn.com
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.scaleborn.elasticsearch.linreg.aggregation.stats;
18+
19+
/**
20+
* Created by mbok on 21.03.17.
21+
*/
22+
public class InternalStats {
23+
24+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright (c) 2017 Scaleborn UG, www.scaleborn.com
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.scaleborn.elasticsearch.linreg.aggregation.stats;
18+
19+
import org.scaleborn.linereg.statistics.Statistics;
20+
21+
/**
22+
* Created by mbok on 21.03.17.
23+
*/
24+
public interface Stats extends Statistics {
25+
26+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Copyright (c) 2017 Scaleborn UG, www.scaleborn.com
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.scaleborn.elasticsearch.linreg.aggregation.stats;
18+
19+
import java.io.IOException;
20+
import java.util.List;
21+
import org.elasticsearch.common.io.stream.StreamInput;
22+
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
23+
import org.elasticsearch.search.aggregations.AggregatorFactory;
24+
import org.elasticsearch.search.aggregations.InternalAggregation;
25+
import org.elasticsearch.search.aggregations.InternalAggregation.Type;
26+
import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric;
27+
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
28+
import org.elasticsearch.search.internal.SearchContext;
29+
import org.scaleborn.elasticsearch.linreg.aggregation.support.BaseAggregationBuilder;
30+
import org.scaleborn.elasticsearch.linreg.aggregation.support.BaseAggregatorFactory;
31+
32+
/**
33+
* Created by mbok on 21.03.17.
34+
*/
35+
public class StatsAggregationBuilder extends BaseAggregationBuilder<StatsAggregationBuilder> {
36+
37+
public static final String NAME = "linreg_stats";
38+
private static final InternalAggregation.Type TYPE = new InternalAggregation.Type(NAME);
39+
40+
protected StatsAggregationBuilder(final StreamInput in,
41+
final Type type) throws IOException {
42+
super(in, type);
43+
}
44+
45+
@Override
46+
protected BaseAggregatorFactory<?> innerBuild(final SearchContext context,
47+
final List<ValuesSourceConfig<Numeric>> featureConfigs,
48+
final ValuesSourceConfig<Numeric> responseConfig,
49+
final AggregatorFactory<?> parent, final Builder subFactoriesBuilder) throws IOException {
50+
return null;
51+
}
52+
53+
54+
@Override
55+
public String getWriteableName() {
56+
return NAME;
57+
}
58+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright (c) 2017 Scaleborn UG, www.scaleborn.com
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.scaleborn.elasticsearch.linreg.aggregation.stats;
18+
19+
import java.io.IOException;
20+
import java.util.List;
21+
import java.util.Map;
22+
import org.apache.lucene.index.LeafReaderContext;
23+
import org.elasticsearch.common.util.ObjectArray;
24+
import org.elasticsearch.search.aggregations.Aggregator;
25+
import org.elasticsearch.search.aggregations.InternalAggregation;
26+
import org.elasticsearch.search.aggregations.LeafBucketCollector;
27+
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
28+
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
29+
import org.elasticsearch.search.internal.SearchContext;
30+
import org.scaleborn.linereg.sampling.Sampler;
31+
32+
/**
33+
* Created by mbok on 21.03.17.
34+
*/
35+
public class StatsAggregator extends NumericMetricsAggregator.MultiValue {
36+
37+
protected ObjectArray<Sampler<?>> samplers;
38+
39+
public StatsAggregator(final String name, final SearchContext context,
40+
final Aggregator parent,
41+
final List<PipelineAggregator> pipelineAggregators,
42+
final Map<String, Object> metaData,
43+
final ObjectArray<Sampler<?>> samples) throws IOException {
44+
super(name, context, parent, pipelineAggregators, metaData);
45+
this.samplers = samplers;
46+
}
47+
48+
@Override
49+
public boolean hasMetric(final String name) {
50+
return false;
51+
}
52+
53+
@Override
54+
public double metric(final String name, final long owningBucketOrd) {
55+
return 0;
56+
}
57+
58+
@Override
59+
protected LeafBucketCollector getLeafCollector(final LeafReaderContext ctx,
60+
final LeafBucketCollector sub)
61+
throws IOException {
62+
return null;
63+
}
64+
65+
@Override
66+
public InternalAggregation buildAggregation(final long bucket) throws IOException {
67+
return null;
68+
}
69+
70+
@Override
71+
public InternalAggregation buildEmptyAggregation() {
72+
return null;
73+
}
74+
}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/*
2+
* Copyright (c) 2017 Scaleborn UG, www.scaleborn.com
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.scaleborn.elasticsearch.linreg.aggregation.support;
18+
19+
import java.io.IOException;
20+
import java.util.ArrayList;
21+
import java.util.Arrays;
22+
import java.util.List;
23+
import java.util.Objects;
24+
import org.elasticsearch.common.io.stream.StreamInput;
25+
import org.elasticsearch.common.io.stream.StreamOutput;
26+
import org.elasticsearch.common.xcontent.XContentBuilder;
27+
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
28+
import org.elasticsearch.search.aggregations.AggregatorFactories;
29+
import org.elasticsearch.search.aggregations.AggregatorFactory;
30+
import org.elasticsearch.search.aggregations.InternalAggregation.Type;
31+
import org.elasticsearch.search.aggregations.support.ValueType;
32+
import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric;
33+
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
34+
import org.elasticsearch.search.internal.SearchContext;
35+
36+
/**
37+
* Created by mbok on 22.03.17.
38+
*/
39+
public abstract class BaseAggregationBuilder<AB extends BaseAggregationBuilder<AB>> extends
40+
AbstractAggregationBuilder<AB> {
41+
42+
private List<String> featureFields;
43+
private String responseField;
44+
45+
public BaseAggregationBuilder(final String name,
46+
final Type type) {
47+
super(name, type);
48+
}
49+
50+
protected BaseAggregationBuilder(final StreamInput in,
51+
final Type type) throws IOException {
52+
super(in, type);
53+
read(in);
54+
}
55+
56+
/**
57+
* Read from a stream.
58+
*/
59+
private void read(StreamInput in) throws IOException {
60+
featureFields = Arrays.asList(in.readStringArray());
61+
responseField = in.readString();
62+
}
63+
64+
@Override
65+
protected void doWriteTo(final StreamOutput out) throws IOException {
66+
out.writeStringArray(featureFields.toArray(new String[featureFields.size()]));
67+
out.writeString(responseField);
68+
innerWriteTo(out);
69+
}
70+
71+
/**
72+
* Write subclass's state to the stream if required.
73+
*
74+
* @param out the output stream
75+
*/
76+
protected void innerWriteTo(StreamOutput out) throws IOException {
77+
// NOP
78+
}
79+
80+
81+
@Override
82+
protected final BaseAggregatorFactory<?> doBuild(SearchContext context,
83+
AggregatorFactory<?> parent,
84+
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
85+
List<ValuesSourceConfig<Numeric>> featureConfigs = new ArrayList<>(featureFields.size());
86+
for (String featureField : featureFields) {
87+
featureConfigs.add(ValuesSourceConfig.resolve(context.getQueryShardContext(),
88+
ValueType.NUMERIC, featureField, null, null, null, null));
89+
}
90+
ValuesSourceConfig<Numeric> responseConfig = ValuesSourceConfig
91+
.resolve(context.getQueryShardContext(),
92+
ValueType.NUMERIC, responseField, null, null, null, null);
93+
BaseAggregatorFactory<?> factory = innerBuild(context,
94+
featureConfigs,
95+
responseConfig, parent,
96+
subFactoriesBuilder);
97+
return factory;
98+
}
99+
100+
101+
protected abstract BaseAggregatorFactory<?> innerBuild(SearchContext context,
102+
List<ValuesSourceConfig<Numeric>> featureConfigs, ValuesSourceConfig<Numeric> responseConfig,
103+
AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder)
104+
throws IOException;
105+
106+
107+
@Override
108+
public final XContentBuilder internalXContent(XContentBuilder builder, Params params)
109+
throws IOException {
110+
builder.startObject();
111+
if (featureFields != null) {
112+
builder.field("feature_fields", featureFields);
113+
}
114+
if (responseField != null) {
115+
builder.field("response_field", responseField);
116+
}
117+
doXContentBody(builder, params);
118+
builder.endObject();
119+
return builder;
120+
}
121+
122+
/**
123+
* Override in sub classes if required.
124+
*/
125+
protected XContentBuilder doXContentBody(XContentBuilder builder, Params params)
126+
throws IOException {
127+
return builder;
128+
}
129+
130+
@Override
131+
protected final int doHashCode() {
132+
return Objects.hash(featureFields, responseField, innerHashCode());
133+
}
134+
135+
/**
136+
* Override in sub classes to include further attributes.
137+
*/
138+
protected int innerHashCode() {
139+
return 0;
140+
}
141+
142+
@Override
143+
protected boolean doEquals(final Object obj) {
144+
BaseAggregationBuilder other = (BaseAggregationBuilder) obj;
145+
if (!Objects.equals(featureFields, other.featureFields)) {
146+
return false;
147+
}
148+
if (!Objects.equals(responseField, other.responseField)) {
149+
return false;
150+
}
151+
return innerEquals(obj);
152+
}
153+
154+
/**
155+
* Override in sub classes to include further attributes.
156+
*/
157+
protected boolean innerEquals(Object obj) {
158+
return true;
159+
}
160+
161+
}

0 commit comments

Comments
 (0)