Skip to content

Commit 4b8d58b

Browse files
committed
[WIP] - force push over me.
1 parent b881b3e commit 4b8d58b

File tree

21 files changed

+476
-25
lines changed

21 files changed

+476
-25
lines changed

internal/execution/queries/plan/access.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"github.com/efritz/gostgres/internal/execution/expressions"
66
"github.com/efritz/gostgres/internal/execution/queries/nodes"
77
"github.com/efritz/gostgres/internal/execution/queries/nodes/access"
8+
"github.com/efritz/gostgres/internal/execution/queries/plan/cost"
89
"github.com/efritz/gostgres/internal/shared/fields"
910
"github.com/efritz/gostgres/internal/shared/impls"
1011
)
@@ -59,8 +60,8 @@ func (n *logicalAccessNode) Optimize(ctx impls.OptimizationContext) {
5960
n.order = nil
6061
}
6162

62-
func (n *logicalAccessNode) EstimateCost() Cost {
63-
return Cost{} // TODO
63+
func (n *logicalAccessNode) EstimateCost() impls.NodeCost {
64+
return cost.ApplyFilterToCost(n.strategy.EstimateCost(), cost.EstimateFilterSelectivity(n.filter))
6465
}
6566

6667
func (n *logicalAccessNode) Filter() impls.Expression {
@@ -162,6 +163,17 @@ func (s *logicalTableAccessStrategy) Ordering() impls.OrderExpression {
162163
return nil
163164
}
164165

166+
var tableAccessCostPerRow = impls.ResourceCost{CPU: 0.01, IO: 0.1}
167+
168+
func (s *logicalTableAccessStrategy) EstimateCost() impls.NodeCost {
169+
stats, _ := s.table.Statistics()
170+
171+
return impls.NodeCost{
172+
EstimatedRows: stats.RowCount,
173+
VariableCost: tableAccessCostPerRow.ScaleUniform(float64(stats.RowCount)),
174+
}
175+
}
176+
165177
func (s *logicalTableAccessStrategy) Build() nodes.AccessStrategy {
166178
return access.NewTableAccessStrategy(s.table)
167179
}
@@ -186,6 +198,20 @@ func (s *logicalIndexAccessStrategy[O]) Filter() impls.Expression {
186198
return expressions.UnionFilters(append(expressions.Conjunctions(filterExpression), expressions.Conjunctions(condition)...)...)
187199
}
188200

201+
var indexAccessCostPerRow = impls.ResourceCost{CPU: 0.01, IO: 0.1}
202+
203+
func (s *logicalIndexAccessStrategy[O]) EstimateCost() impls.NodeCost {
204+
stats, _ := s.table.Statistics()
205+
206+
// TODO - support parital indexes
207+
// TODO - determine selectivity based on index cond
208+
209+
return impls.NodeCost{
210+
EstimatedRows: stats.RowCount,
211+
VariableCost: indexAccessCostPerRow.ScaleUniform(float64(stats.RowCount)),
212+
}
213+
}
214+
189215
func (s *logicalIndexAccessStrategy[O]) Build() nodes.AccessStrategy {
190216
return access.NewIndexAccessStrategy(s.table, s.index, s.opts, s.Filter())
191217
}

internal/execution/queries/plan/analyze.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func (n *logicalAnalyze) Fields() []fields.Field
2121
func (n *logicalAnalyze) AddFilter(ctx impls.OptimizationContext, filter impls.Expression) {} // top-level
2222
func (n *logicalAnalyze) AddOrder(ctx impls.OptimizationContext, order impls.OrderExpression) {} // top-level
2323
func (n *logicalAnalyze) Optimize(ctx impls.OptimizationContext) {}
24-
func (n *logicalAnalyze) EstimateCost() Cost { return Cost{} }
24+
func (n *logicalAnalyze) EstimateCost() impls.NodeCost { return impls.NodeCost{} }
2525
func (n *logicalAnalyze) Filter() impls.Expression { return nil }
2626
func (n *logicalAnalyze) Ordering() impls.OrderExpression { return nil }
2727
func (n *logicalAnalyze) SupportsMarkRestore() bool { return false }
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package cost
2+
3+
import "github.com/efritz/gostgres/internal/shared/impls"
4+
5+
var (
6+
buildAggregateTableCostPerRow = impls.ResourceCost{CPU: 0.1}
7+
buildAggregateTableCostPerBucket = impls.ResourceCost{Memory: 0.1}
8+
)
9+
10+
func ApplyAggregationToCost(innerCost impls.NodeCost, estimatedNumBuckets int) impls.NodeCost {
11+
// Aggregation reads the entire inner relation on startup
12+
cost := MaterializeCost(innerCost)
13+
14+
// Add the cost of creating aggregation buckets and hashing all rows from the inner relation
15+
n := float64(innerCost.EstimatedRows)
16+
cost.FixedCost = cost.FixedCost.Add(buildAggregateTableCostPerRow.ScaleUniform(n))
17+
cost.FixedCost = cost.FixedCost.Add(buildAggregateTableCostPerBucket.ScaleUniform(float64(estimatedNumBuckets)))
18+
19+
// One row is projected for each bucket
20+
cost.EstimatedRows = estimatedNumBuckets
21+
cost = ApplyProjectionToCost(cost)
22+
23+
return cost
24+
}
25+
26+
func EstimateDistinctCount(estimatedRows int) int {
27+
return estimatedRows // TODO
28+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package cost
2+
3+
import "github.com/efritz/gostgres/internal/shared/impls"
4+
5+
var filterEvaluationCostPerRow = impls.ResourceCost{CPU: 0.01}
6+
7+
func ApplyFilterToCost(innerCost impls.NodeCost, filterSelectivity float64) impls.NodeCost {
8+
cost := innerCost
9+
10+
// Evaluate a filter expression for every row
11+
n := float64(innerCost.EstimatedRows)
12+
cost.VariableCost = cost.VariableCost.Add(filterEvaluationCostPerRow.ScaleUniform(n))
13+
14+
// Only rows selected by the filter are emitted
15+
cost.EstimatedRows = int(n * filterSelectivity)
16+
17+
return cost
18+
}
19+
20+
func EstimateFilterSelectivity(filter impls.Expression) float64 {
21+
if filter == nil {
22+
return 1.0
23+
}
24+
25+
return 1.0 // TODO - estimate
26+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package cost
2+
3+
func coalesce(value *int, defaultValue int) int {
4+
if value != nil {
5+
return *value
6+
}
7+
8+
return defaultValue
9+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package cost
2+
3+
import "github.com/efritz/gostgres/internal/shared/impls"
4+
5+
var (
6+
joinMergeCostPerRow = impls.ResourceCost{CPU: 0.2}
7+
joinFilterCostPerRow = impls.ResourceCost{CPU: 0.1}
8+
)
9+
10+
func EstimateNestedLoopJoinCost(
11+
leftCost impls.NodeCost,
12+
rightCost impls.NodeCost,
13+
joinSelectivity float64,
14+
hasCondition bool,
15+
) impls.NodeCost {
16+
estimatedLeftRows := float64(leftCost.EstimatedRows)
17+
estimatedRightRows := float64(rightCost.EstimatedRows)
18+
estimatedCandidateRows := estimatedLeftRows * estimatedRightRows
19+
estimatedResultRows := estimatedCandidateRows * joinSelectivity
20+
21+
costPerCandidateRow := joinMergeCostPerRow
22+
if hasCondition {
23+
costPerCandidateRow = costPerCandidateRow.Add(joinFilterCostPerRow)
24+
}
25+
26+
// On startup, we only initialize the left scanner
27+
fixedCost := leftCost.FixedCost
28+
29+
variableCost := impls.SumCosts(
30+
leftCost.VariableCost.ScaleUniform(estimatedLeftRows), // Cost to scan each row from left relation
31+
rightCost.FixedCost.ScaleUniform(estimatedLeftRows), // Cost to re-initialized right scanner for every row from left relation
32+
rightCost.VariableCost.ScaleUniform(estimatedCandidateRows), // Cost to scan each row from right relation
33+
costPerCandidateRow.ScaleUniform(estimatedCandidateRows), // Cost to merge row pairs and evaluate the join condition
34+
)
35+
36+
return impls.NodeCost{
37+
EstimatedRows: int(estimatedResultRows),
38+
FixedCost: fixedCost,
39+
VariableCost: variableCost,
40+
}
41+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package cost
2+
3+
import "github.com/efritz/gostgres/internal/shared/impls"
4+
5+
func AlterCostByLimitOffset(innerCost impls.NodeCost, limit, offset *int) impls.NodeCost {
6+
if limit == nil && offset == nil {
7+
return innerCost
8+
}
9+
10+
cost := innerCost
11+
12+
// Scale the variable cost of the inner relation by reading only limit + offset rows
13+
o := coalesce(offset, 0)
14+
l := coalesce(limit, innerCost.EstimatedRows-o)
15+
cost.VariableCost = cost.VariableCost.ScaleUniform(float64(l+o) / float64(innerCost.EstimatedRows))
16+
17+
// Adjust number of output rows; this may be less than limit + offset, so we may end up
18+
// "smearing" the variable cost of the inner relation over fewer rows of the outer relation.
19+
cost.EstimatedRows = l
20+
21+
return cost
22+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package cost
2+
3+
import "github.com/efritz/gostgres/internal/shared/impls"
4+
5+
func MaterializeCost(innerCost impls.NodeCost) impls.NodeCost {
6+
cost := innerCost
7+
8+
// We pay the variable cost of the inner relation at startup
9+
cost.FixedCost = cost.FixedCost.Add(innerCost.VariableCost)
10+
11+
// Reset the varaible cost so we're not double counting it
12+
cost.VariableCost = impls.ResourceCost{}
13+
14+
return cost
15+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package cost
2+
3+
import (
4+
"math"
5+
6+
"github.com/efritz/gostgres/internal/shared/impls"
7+
)
8+
9+
var (
10+
sortCostPerRow = impls.ResourceCost{Memory: 0.1}
11+
sortCostPerComparison = impls.ResourceCost{CPU: 0.1}
12+
)
13+
14+
func ApplySortToCost(innerCost impls.NodeCost) impls.NodeCost {
15+
// Sorting reads the entire inner relation on startup
16+
cost := MaterializeCost(innerCost)
17+
18+
// Add the cost of storing each row in-memory and performing the comparison sort
19+
n := float64(innerCost.EstimatedRows)
20+
cost.FixedCost = cost.FixedCost.Add(sortCostPerRow.ScaleUniform(n))
21+
cost.FixedCost = cost.FixedCost.Add(sortCostPerComparison.ScaleUniform(n * math.Log2(n)))
22+
23+
return cost
24+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package cost
2+
3+
import "github.com/efritz/gostgres/internal/shared/impls"
4+
5+
var projectionEvaluationCostPerRow = impls.ResourceCost{CPU: 0.01}
6+
7+
func ApplyProjectionToCost(innerCost impls.NodeCost) impls.NodeCost {
8+
cost := innerCost
9+
10+
// Evaluate a projection expression for every row
11+
n := float64(innerCost.EstimatedRows)
12+
cost.VariableCost = cost.VariableCost.Add(projectionEvaluationCostPerRow.ScaleUniform(n))
13+
14+
return cost
15+
}

0 commit comments

Comments
 (0)