Merge pull request #1854 from dolthub/fulghum/bugfix-2

fulghum · web-flow · commit 3a4afb2c595b · 2023-06-29T09:22:54.000-07:00
Prevent loops in stored procedures from returning multiple result sets
diff --git a/enginetest/queries/procedure_queries.go b/enginetest/queries/procedure_queries.go
@@ -22,6 +22,50 @@ import (
 )
 
 var ProcedureLogicTests = []ScriptTest{
+	{
+		// When a loop is executed once before the first evaluation of the loop condition, we expect the stored
+		// procedure to return the last result set from that first loop execution.
+		Name: "REPEAT with OnceBefore returns first loop evaluation result set",
+		SetUpScript: []string{
+			`CREATE PROCEDURE p1()
+	BEGIN
+	SET @counter = 0;
+	REPEAT
+		SELECT 42 from dual;
+		SET @counter = @counter + 1;
+	UNTIL @counter >= 0
+	END REPEAT;
+	END`,
+		},
+		Assertions: []ScriptTestAssertion{
+			{
+				Query:    "CALL p1;",
+				Expected: []sql.Row{{42}},
+			},
+		},
+	},
+	{
+		// When a loop condition evals to false, we expect the stored procedure to return the last
+		// result set from the previous loop execution.
+		Name: "WHILE returns previous loop evaluation result set",
+		SetUpScript: []string{
+			`CREATE PROCEDURE p1()
+	BEGIN
+	SET @counter = 0;
+	WHILE @counter <= 0 DO
+		SET @counter = @counter + 1;
+		SELECT CAST(@counter + 41 as SIGNED) from dual;
+	END WHILE;
+	END`,
+		},
+		Assertions: []ScriptTestAssertion{
+			{
+				Query:    "CALL p1;",
+				Expected: []sql.Row{{42}},
+			},
+		},
+	},
+
 	{
 		Name: "Simple SELECT",
 		SetUpScript: []string{
@@ -278,17 +322,25 @@ BEGIN
 END`,
 		},
 		Assertions: []ScriptTestAssertion{
+			// TODO: MySQL won't actually return *any* result set for these stored procedures. We have done work
+			//       to filter out all but the last result set generated by the stored procedure, but we still
+			//       need to filter out Result Sets that should be completely omitted.
 			{
 				Query:    "CALL p1(0)",
-				Expected: []sql.Row{},
+				Expected: []sql.Row{{}},
 			},
 			{
 				Query:    "CALL p1(1)",
-				Expected: []sql.Row{{}, {}}, // Next calls return an empty row, but progress the loop
+				Expected: []sql.Row{{}},
 			},
 			{
 				Query:    "CALL p1(2)",
-				Expected: []sql.Row{{}, {}, {}},
+				Expected: []sql.Row{{}},
+			},
+			{
+				// https://github.com/dolthub/dolt/issues/6230
+				Query:    "CALL p1(200)",
+				Expected: []sql.Row{{}},
 			},
 		},
 	},
@@ -304,17 +356,20 @@ BEGIN
 END`,
 		},
 		Assertions: []ScriptTestAssertion{
+			// TODO: MySQL won't actually return *any* result set for these stored procedures. We have done work
+			//       to filter out all but the last result set generated by the stored procedure, but we still
+			//       need to filter out Result Sets that should be completely omitted.
 			{
 				Query:    "CALL p1(0)",
 				Expected: []sql.Row{{}},
 			},
 			{
 				Query:    "CALL p1(1)",
-				Expected: []sql.Row{{}, {}},
+				Expected: []sql.Row{{}},
 			},
 			{
 				Query:    "CALL p1(2)",
-				Expected: []sql.Row{{}, {}, {}},
+				Expected: []sql.Row{{}},
 			},
 		},
 	},
diff --git a/sql/rowexec/proc.go b/sql/rowexec/proc.go
@@ -16,8 +16,8 @@ package rowexec
 
 import (
 	"fmt"
+	"io"
 	"strings"
-	"sync"
 
 	"github.com/dolthub/go-mysql-server/sql"
 	"github.com/dolthub/go-mysql-server/sql/expression"
@@ -194,27 +194,137 @@ func (b *BaseBuilder) buildCall(ctx *sql.Context, n *plan.Call, row sql.Row) (sq
 	}, nil
 }
 
+// buildLoop builds and returns an iterator that can be used to iterate over the result set returned from the
+// specified loop, |n|, for the specified row, |row|. Note that because of how we execute stored procedures and cache
+// the results in order to only send back the LAST result set (instead of supporting multiple results sets from
+// stored procedures, like MySQL does), building the iterator here also implicitly means that we're executing the
+// loop logic and caching the result set in memory. This will obviously be an issue for very large result sets.
+// Unfortunately, we can't know at analysis time what the last result set returned will be, since conditional logic
+// in stored procedures can't be known until execution time, hence why we end up caching result sets when we
+// see them and just playing back the last one. Adding support for MySQL's multiple result set behavior and better
+// matching MySQL on which statements are allowed to return result sets from a stored procedure seems like it could
+// potentially allow us to get rid of that caching.
 func (b *BaseBuilder) buildLoop(ctx *sql.Context, n *plan.Loop, row sql.Row) (sql.RowIter, error) {
-	var blockIter sql.RowIter
-	// Currently, acquiring the RowIter will actually run through the loop once, so we abuse this by grabbing the iter
-	// only if we're supposed to run through the iter once before evaluating the condition
+	// Acquiring the RowIter will actually execute the loop body once (because of how we cache/scan for the right
+	// SELECT result set to return), so we grab the iter ONLY if we're supposed to run through the loop body once
+	// before evaluating the condition
+	var loopBodyIter sql.RowIter
 	if n.OnceBeforeEval {
 		var err error
-		blockIter, err = b.loopAcquireRowIter(ctx, row, n.Label, n.Block, true)
+		loopBodyIter, err = b.loopAcquireRowIter(ctx, row, n.Label, n.Block, true)
 		if err != nil {
 			return nil, err
 		}
 	}
-	iter := &loopIter{
-		block:         n.Block,
-		label:         strings.ToLower(n.Label),
-		condition:     n.Condition,
-		once:          sync.Once{},
-		blockIter:     blockIter,
-		row:           row,
-		loopIteration: 0,
+
+	var returnRows []sql.Row
+	var returnNode sql.Node
+	var returnSch sql.Schema
+	selectSeen := false
+
+	// It's technically valid to make an infinite loop, but we don't want to actually allow that
+	const maxIterationCount = 10_000_000_000
+
+	for loopIteration := 0; loopIteration <= maxIterationCount; loopIteration++ {
+		if loopIteration >= maxIterationCount {
+			return nil, fmt.Errorf("infinite LOOP detected")
+		}
+
+		// If the condition is false, then we stop evaluation
+		condition, err := n.Condition.Eval(ctx, nil)
+		if err != nil {
+			return nil, err
+		}
+		conditionBool, err := types.ConvertToBool(condition)
+		if err != nil {
+			return nil, err
+		}
+		if !conditionBool {
+			// loopBodyIter should only be set if this is the first time through the loop and the loop has a
+			// OnceBeforeEval condition. This ensures we return a result set, without us having to drain the iterator,
+			// recache rows, and return a new iterator.
+			if loopBodyIter != nil {
+				return loopBodyIter, nil
+			} else {
+				break
+			}
+		}
+
+		if loopBodyIter == nil {
+			var err error
+			loopBodyIter, err = b.loopAcquireRowIter(ctx, nil, strings.ToLower(n.Label), n.Block, false)
+			if err == io.EOF {
+				break
+			} else if err != nil {
+				return nil, err
+			}
+		}
+
+		includeResultSet := false
+
+		var subIterNode sql.Node = n.Block
+		subIterSch := n.Block.Schema()
+		if blockRowIter, ok := loopBodyIter.(plan.BlockRowIter); ok {
+			subIterNode = blockRowIter.RepresentingNode()
+			subIterSch = blockRowIter.Schema()
+
+			if plan.NodeRepresentsSelect(subIterNode) {
+				selectSeen = true
+				includeResultSet = true
+				returnNode = subIterNode
+				returnSch = subIterSch
+			} else if !selectSeen {
+				includeResultSet = true
+				returnNode = subIterNode
+				returnSch = subIterSch
+			}
+		}
+
+		// Wrap the caching code in an inline function so that we can use defer to safely dispose of the cache
+		err = func() error {
+			rowCache, disposeFunc := ctx.Memory.NewRowsCache()
+			defer disposeFunc()
+
+			nextRow, err := loopBodyIter.Next(ctx)
+			for ; err == nil; nextRow, err = loopBodyIter.Next(ctx) {
+				rowCache.Add(nextRow)
+			}
+			if err != io.EOF {
+				return err
+			}
+
+			err = loopBodyIter.Close(ctx)
+			if err != nil {
+				return err
+			}
+			loopBodyIter = nil
+
+			if includeResultSet {
+				returnRows = rowCache.Get()
+			}
+			return nil
+		}()
+
+		if err != nil {
+			if err == io.EOF {
+				// no-op for an EOF, just execute the next loop iteration
+			} else if controlFlow, ok := err.(loopError); ok && strings.ToLower(controlFlow.Label) == n.Label {
+				if controlFlow.IsExit {
+					break
+				}
+			} else {
+				// If the error wasn't a control flow error signaling to start the next loop iteration or to
+				// exit the loop, then it must be a real error, so just return it.
+				return nil, err
+			}
+		}
 	}
-	return iter, nil
+
+	return &blockIter{
+		internalIter: sql.RowsToRowIter(returnRows...),
+		repNode:      returnNode,
+		sch:          returnSch,
+	}, nil
 }
 
 func (b *BaseBuilder) buildElseCaseError(ctx *sql.Context, n plan.ElseCaseError, row sql.Row) (sql.RowIter, error) {
diff --git a/sql/rowexec/proc_iters.go b/sql/rowexec/proc_iters.go
@@ -18,14 +18,12 @@ import (
 	"fmt"
 	"io"
 	"strings"
-	"sync"
 
 	"github.com/dolthub/vitess/go/mysql"
 
 	"github.com/dolthub/go-mysql-server/sql"
 	"github.com/dolthub/go-mysql-server/sql/expression"
 	"github.com/dolthub/go-mysql-server/sql/plan"
-	"github.com/dolthub/go-mysql-server/sql/types"
 )
 
 // ifElseIter is the row iterator for *IfElseBlock.
@@ -245,90 +243,6 @@ func (c *closeIter) Close(ctx *sql.Context) error {
 	return nil
 }
 
-// loopIter is the sql.RowIter of *Loop.
-type loopIter struct {
-	block         *plan.Block
-	label         string
-	condition     sql.Expression
-	once          sync.Once
-	blockIter     sql.RowIter
-	row           sql.Row
-	loopIteration uint64
-}
-
-var _ sql.RowIter = (*loopIter)(nil)
-
-// Next implements the interface sql.RowIter.
-func (l *loopIter) Next(ctx *sql.Context) (sql.Row, error) {
-	// It's technically valid to make an infinite loop, but we don't want to actually allow that
-	const maxIterationCount = 10_000_000_000
-	l.loopIteration++
-	for ; l.loopIteration < maxIterationCount; l.loopIteration++ {
-		// If the condition is false, then we stop evaluation
-		condition, err := l.condition.Eval(ctx, nil)
-		if err != nil {
-			return nil, err
-		}
-		conditionBool, err := types.ConvertToBool(condition)
-		if err != nil {
-			return nil, err
-		}
-		if !conditionBool {
-			return nil, io.EOF
-		}
-
-		if l.blockIter == nil {
-			var err error
-			b := &BaseBuilder{}
-			l.blockIter, err = b.loopAcquireRowIter(ctx, nil, l.label, l.block, false)
-			if err != nil {
-				return nil, err
-			}
-		}
-
-		if err := startTransaction(ctx); err != nil {
-			return nil, err
-		}
-
-		nextRow, err := l.blockIter.Next(ctx)
-		if err != nil {
-			restart := false
-			if err == io.EOF {
-				restart = true
-			} else if controlFlow, ok := err.(loopError); ok && strings.ToLower(controlFlow.Label) == l.label {
-				if controlFlow.IsExit {
-					return nil, io.EOF
-				} else {
-					restart = true
-				}
-			}
-
-			if restart {
-				err = l.blockIter.Close(ctx)
-				if err != nil {
-					return nil, err
-				}
-				l.blockIter = nil
-				continue
-			}
-			return nil, err
-		}
-		return nextRow, nil
-	}
-	if l.loopIteration >= maxIterationCount {
-		return nil, fmt.Errorf("infinite LOOP detected")
-	}
-	return nil, io.EOF
-}
-
-// Close implements the interface sql.RowIter.
-func (l *loopIter) Close(ctx *sql.Context) error {
-	if l.blockIter != nil {
-		return l.blockIter.Close(ctx)
-	}
-	return nil
-}
-
 // loopError is an error used to control a loop's flow.
 type loopError struct {
 	Label  string
diff --git a/sql/rowexec/rel.go b/sql/rowexec/rel.go
@@ -281,7 +281,7 @@ func (b *BaseBuilder) buildOrderedDistinct(ctx *sql.Context, n *plan.OrderedDist
 }
 
 func (b *BaseBuilder) buildWith(ctx *sql.Context, n *plan.With, row sql.Row) (sql.RowIter, error) {
-	return nil, fmt.Errorf("*plan.With has not execution iterator")
+	return nil, fmt.Errorf("*plan.With has no execution iterator")
 }
 
 func (b *BaseBuilder) buildProject(ctx *sql.Context, n *plan.Project, row sql.Row) (sql.RowIter, error) {

Original file line number	Diff line number	Diff line change
`@@ -281,7 +281,7 @@ func (b BaseBuilder) buildOrderedDistinct(ctx sql.Context, n *plan.OrderedDist`
`281`	`281`	`}`
`282`	`282`
`283`	`283`	`func (b BaseBuilder) buildWith(ctx sql.Context, n *plan.With, row sql.Row) (sql.RowIter, error) {`
`284`		`- return nil, fmt.Errorf("*plan.With has not execution iterator")`
	`284`	`+ return nil, fmt.Errorf("*plan.With has no execution iterator")`
`285`	`285`	`}`
`286`	`286`
`287`	`287`	`func (b BaseBuilder) buildProject(ctx sql.Context, n *plan.Project, row sql.Row) (sql.RowIter, error) {`