File tree Expand file tree Collapse file tree 8 files changed +14
-16
lines changed
vortex-layout/src/layouts Expand file tree Collapse file tree 8 files changed +14
-16
lines changed Original file line number Diff line number Diff line change @@ -22,7 +22,6 @@ permissions:
2222 contents : read
2323 pull-requests : write # for commenting on PRs
2424 id-token : write # enables AWS-GitHub OIDC
25- deployments : write # for Polar Signals profiling
2625
2726jobs :
2827 label_trigger :
Original file line number Diff line number Diff line change @@ -10,7 +10,6 @@ permissions:
1010 id-token : write # enables AWS-GitHub OIDC
1111 actions : read
1212 contents : write
13- deployments : write
1413
1514jobs :
1615 commit-metadata :
Original file line number Diff line number Diff line change 4848 path : docs/_build/html
4949
5050 deploy :
51+ permissions :
52+ deployments : write
5153 environment :
5254 name : github-pages
5355 url : ${{ steps.deployment.outputs.page_url }}
Original file line number Diff line number Diff line change @@ -15,7 +15,6 @@ permissions:
1515 id-token : write # enables AWS-GitHub OIDC
1616 actions : read
1717 contents : write
18- deployments : write
1918
2019jobs :
2120 sql :
Original file line number Diff line number Diff line change 4646 timeout-minutes : 120
4747 permissions :
4848 id-token : write # IMPORTANT: mandatory for trusted publishing
49+ deployments : write
4950 environment :
5051 name : push-to-pypi
5152 url : https://pypi.org/p/vortex-data
Original file line number Diff line number Diff line change @@ -17,7 +17,6 @@ permissions:
1717 contents : read
1818 pull-requests : write # for commenting on PRs
1919 id-token : write # enables AWS-GitHub OIDC
20- deployments : write # for Polar Signals profiling
2120
2221jobs :
2322 label_trigger :
Original file line number Diff line number Diff line change @@ -92,6 +92,7 @@ impl DictReader {
9292 // We capture the name, so it may be wrong if we re-use the same reader within multiple
9393 // different parent readers. But that's rare...
9494 let values_len = self . values_len ;
95+ let session = self . session . clone ( ) ;
9596 self . values_array
9697 . get_or_init ( move || {
9798 self . values
@@ -102,7 +103,15 @@ impl DictReader {
102103 )
103104 . vortex_expect ( "must construct dict values array evaluation" )
104105 . map_err ( Arc :: new)
105- . map_ok ( |arr| arr. to_canonical ( ) . into_array ( ) )
106+ . map ( move |array| {
107+ if * USE_VORTEX_OPERATORS {
108+ // We execute the array to avoid re-evaluating for every split.
109+ let array = array?;
110+ Ok ( array. execute_vector ( & session) ?. into_array ( array. dtype ( ) ) )
111+ } else {
112+ Ok ( array?. to_canonical ( ) . into_array ( ) )
113+ }
114+ } )
106115 . boxed ( )
107116 . shared ( )
108117 } )
Original file line number Diff line number Diff line change @@ -37,16 +37,6 @@ use crate::segments::SegmentSource;
3737// actual expression? Perhaps all expressions are given a selection mask to decide for themselves?
3838const EXPR_EVAL_THRESHOLD : f64 = 0.2 ;
3939
40- /// Below this mask density we will propagate filters one by one. In other words, we filter an
41- /// array using a mask prior to running a filter expression, and then have to perform a more
42- /// expensive rank intersection on the result. This threshold exists because filtering has a
43- /// non-trivial cost, and often that cost outweighs evaluating the filter expression over a few
44- /// more rows that are already known to be false.
45- ///
46- /// TODO(ngates): this threshold should really be estimated based on the cost of the filter + the
47- /// the cost of the expression itself.
48- const FILTER_OF_FILTER_THRESHOLD : f64 = 0.8 ;
49-
5040pub struct FlatReader {
5141 layout : FlatLayout ,
5242 name : Arc < str > ,
@@ -157,7 +147,7 @@ impl LayoutReader for FlatReader {
157147 }
158148
159149 let array_mask = if * USE_VORTEX_OPERATORS {
160- if mask. density ( ) < FILTER_OF_FILTER_THRESHOLD {
150+ if mask. density ( ) < EXPR_EVAL_THRESHOLD {
161151 // We have the choice to apply the filter or the expression first, we apply the
162152 // expression first so that it can try pushing down itself and then the filter
163153 // after this.
You can’t perform that action at this time.
0 commit comments