Skip to content

Commit 011b5b2

Browse files
authored
Merge pull request #829 from dolthub/fulghum/text
Support for using `TEXT` fields in secondary indexes and `UNIQUE` constraints
2 parents f86e8e0 + c413409 commit 011b5b2

File tree

4 files changed

+224
-20
lines changed

4 files changed

+224
-20
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Copyright 2024 Dolthub, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package analyzer
16+
17+
import (
18+
"fmt"
19+
"strings"
20+
21+
"github.com/dolthub/go-mysql-server/sql"
22+
"github.com/dolthub/go-mysql-server/sql/analyzer"
23+
"github.com/dolthub/go-mysql-server/sql/plan"
24+
"github.com/dolthub/go-mysql-server/sql/transform"
25+
26+
pgtypes "github.com/dolthub/doltgresql/server/types"
27+
)
28+
29+
// defaultIndexPrefixLength is the index prefix length that this analyzer rule applies automatically to TEXT columns
30+
// in secondary indexes. 768 is the limit for the prefix length in MySQL and is also enforced in Dolt/GMS, so this
31+
// is currently the largest size we can support.
32+
const defaultIndexPrefixLength = 768
33+
34+
// AddImplicitPrefixLengths searches the |node| tree for any nodes creating an index, and plugs in a default index
35+
// prefix length for any TEXT columns in those new indexes. This rule is intended to be used for Postgres compatibility,
36+
// since Postgres does not require specifying prefix lengths for TEXT columns.
37+
func AddImplicitPrefixLengths(_ *sql.Context, _ *analyzer.Analyzer, node sql.Node, _ *plan.Scope, _ analyzer.RuleSelector, _ *sql.QueryFlags) (sql.Node, transform.TreeIdentity, error) {
38+
var targetSchema sql.Schema
39+
transform.Inspect(node, func(node sql.Node) bool {
40+
if st, ok := node.(sql.SchemaTarget); ok {
41+
targetSchema = st.TargetSchema().Copy()
42+
return false
43+
}
44+
return true
45+
})
46+
47+
// Recurse through the node tree to fill in prefix lengths. Note that some statements come in as Block nodes
48+
// that contain multiple nodes, so we need to recurse through and handle all of them.
49+
return transform.Node(node, func(node sql.Node) (sql.Node, transform.TreeIdentity, error) {
50+
switch node := node.(type) {
51+
case *plan.AddColumn:
52+
// For any AddColumn nodes, we need to update the target schema with the column being added, otherwise
53+
// we won't be able to find those columns if they are also being added to a secondary index.
54+
var err error
55+
targetSchema, err = analyzer.ValidateAddColumn(targetSchema, node)
56+
if err != nil {
57+
return nil, transform.SameTree, err
58+
}
59+
60+
case *plan.CreateTable:
61+
newIndexes := make([]*sql.IndexDef, len(node.Indexes()))
62+
for i := range node.Indexes() {
63+
copy := *node.Indexes()[i]
64+
newIndexes[i] = &copy
65+
}
66+
indexModified := false
67+
for _, index := range newIndexes {
68+
targetSchema := node.TargetSchema()
69+
colMap := schToColMap(targetSchema)
70+
for i := range index.Columns {
71+
col, ok := colMap[strings.ToLower(index.Columns[i].Name)]
72+
if !ok {
73+
return nil, false, fmt.Errorf("indexed column %s not found in schema", index.Columns[i].Name)
74+
}
75+
if _, ok := col.Type.(pgtypes.TextType); ok && index.Columns[i].Length == 0 {
76+
index.Columns[i].Length = defaultIndexPrefixLength
77+
indexModified = true
78+
}
79+
}
80+
}
81+
if indexModified {
82+
newNode, err := node.WithIndexDefs(newIndexes)
83+
return newNode, transform.NewTree, err
84+
}
85+
86+
case *plan.AlterIndex:
87+
if node.Action == plan.IndexAction_Create {
88+
colMap := schToColMap(targetSchema)
89+
newColumns := make([]sql.IndexColumn, len(node.Columns))
90+
for i := range node.Columns {
91+
copy := node.Columns[i]
92+
newColumns[i] = copy
93+
}
94+
indexModified := false
95+
for i := range newColumns {
96+
col, ok := colMap[strings.ToLower(newColumns[i].Name)]
97+
if !ok {
98+
return nil, false, fmt.Errorf("indexed column %s not found in schema", newColumns[i].Name)
99+
}
100+
if _, ok := col.Type.(pgtypes.TextType); ok && newColumns[i].Length == 0 {
101+
newColumns[i].Length = defaultIndexPrefixLength
102+
indexModified = true
103+
}
104+
}
105+
if indexModified {
106+
newNode, err := node.WithColumns(newColumns)
107+
return newNode, transform.NewTree, err
108+
}
109+
}
110+
}
111+
return node, transform.SameTree, nil
112+
})
113+
}
114+
115+
func schToColMap(sch sql.Schema) map[string]*sql.Column {
116+
colMap := make(map[string]*sql.Column, len(sch))
117+
for _, col := range sch {
118+
colMap[strings.ToLower(col.Name)] = col
119+
}
120+
return colMap
121+
}

server/analyzer/init.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ const (
2626
ruleId_AssignUpdateCasts
2727
ruleId_ReplaceIndexedTables
2828
ruleId_ReplaceSerial
29+
ruleId_AddImplicitPrefixLengths
2930
ruleId_InsertContextRootFinalizer
3031
)
3132

@@ -41,11 +42,14 @@ func Init() {
4142
)
4243

4344
// Column default validation was moved to occur after type sanitization, so we'll remove it from its original place
44-
analyzer.OnceBeforeDefault = removeAnalyzerRules(analyzer.OnceBeforeDefault,
45-
analyzer.ValidateColumnDefaultsId)
45+
analyzer.OnceBeforeDefault = removeAnalyzerRules(analyzer.OnceBeforeDefault, analyzer.ValidateColumnDefaultsId)
46+
47+
// PostgreSQL doesn't have the concept of prefix lengths, so we add a rule to implicitly add them
48+
analyzer.OnceBeforeDefault = append([]analyzer.Rule{{Id: ruleId_AddImplicitPrefixLengths, Apply: AddImplicitPrefixLengths}},
49+
analyzer.OnceBeforeDefault...)
50+
4651
// Remove all other validation rules that do not apply to Postgres
47-
analyzer.DefaultValidationRules = removeAnalyzerRules(analyzer.DefaultValidationRules,
48-
analyzer.ValidateOperandsId)
52+
analyzer.DefaultValidationRules = removeAnalyzerRules(analyzer.DefaultValidationRules, analyzer.ValidateOperandsId)
4953

5054
analyzer.OnceAfterDefault = append(analyzer.OnceAfterDefault,
5155
analyzer.Rule{Id: ruleId_ReplaceSerial, Apply: ReplaceSerial},

testing/bats/dataloading/french-towns-communes-francaises.sql

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@ CREATE TABLE Regions (
1818
id SERIAL UNIQUE NOT NULL,
1919
code VARCHAR(4) UNIQUE NOT NULL,
2020
capital VARCHAR(10) NOT NULL, -- REFERENCES Towns (code),
21-
-- TODO: TEXT columns do not work correctly in Doltgres yet
22-
-- name TEXT UNIQUE NOT NULL
23-
name VARCHAR(255) UNIQUE NOT NULL
21+
name TEXT UNIQUE NOT NULL
2422
);
2523

2624
-- Departments / Départements
@@ -31,21 +29,15 @@ CREATE TABLE Departments (
3129
capital VARCHAR(10) UNIQUE NOT NULL, -- REFERENCES Towns (code),
3230
-- Actually, it is the concatenation of D.code + T.code.
3331
region VARCHAR(4) NOT NULL REFERENCES Regions (code),
34-
-- TODO: TEXT columns do not work correctly in Doltgres yet
35-
-- name TEXT UNIQUE NOT NULL
36-
name VARCHAR(255) UNIQUE NOT NULL
32+
name TEXT UNIQUE NOT NULL
3733
);
3834

3935
-- Towns / Communes
4036
CREATE TABLE Towns (
4137
id SERIAL UNIQUE NOT NULL,
4238
code VARCHAR(10) NOT NULL, -- Only unique inside a department
43-
-- TODO: TEXT columns do not work correctly in Doltgres yet
44-
-- article TEXT,
45-
article VARCHAR(255),
46-
-- TODO: TEXT columns do not work correctly in Doltgres yet
47-
-- name TEXT NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix'
48-
name VARCHAR(255) NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix'
39+
article TEXT,
40+
name TEXT NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix'
4941
department VARCHAR(4) NOT NULL REFERENCES Departments (code),
5042
UNIQUE (code, department)
5143
-- UNIQUE (name, department) -- Not perfectly unique but almost

testing/go/types_test.go

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,11 +2111,27 @@ var typesTests = []ScriptTest{
21112111
{
21122112
Name: "Text type",
21132113
SetUpScript: []string{
2114+
// Test a table with a TEXT column
21142115
"CREATE TABLE t_text (id INTEGER primary key, v1 TEXT);",
21152116
"INSERT INTO t_text VALUES (1, 'Hello'), (2, 'World'), (3, ''), (4, NULL);",
2117+
2118+
// Test a table created with a TEXT column in a unique, secondary index
2119+
"CREATE TABLE t_text_unique (id INTEGER primary key, v1 TEXT, v2 TEXT NOT NULL UNIQUE);",
2120+
"INSERT INTO t_text_unique VALUES (1, 'Hello', 'Bonjour'), (2, 'World', 'tout le monde'), (3, '', ''), (4, NULL, '!');",
21162121
},
21172122
Assertions: []ScriptTestAssertion{
21182123
{
2124+
// Use the text keyword to cast
2125+
Query: `SELECT text 'text' || ' and unknown';`,
2126+
Expected: []sql.Row{{"text and unknown"}},
2127+
},
2128+
{
2129+
// Use the text keyword to cast
2130+
Query: `SELECT text 'this is a text string' = text 'this is a text string' AS true;`,
2131+
Expected: []sql.Row{{"t"}},
2132+
},
2133+
{
2134+
// Basic select from a table with a TEXT column
21192135
Query: "SELECT * FROM t_text ORDER BY id;",
21202136
Expected: []sql.Row{
21212137
{1, "Hello"},
@@ -2125,12 +2141,83 @@ var typesTests = []ScriptTest{
21252141
},
21262142
},
21272143
{
2128-
Query: `SELECT text 'text' || ' and unknown';`,
2129-
Expected: []sql.Row{{"text and unknown"}},
2144+
// Create a unique, secondary index on a TEXT column
2145+
Query: "CREATE UNIQUE INDEX v1_unique ON t_text(v1);",
2146+
Expected: []sql.Row{},
21302147
},
21312148
{
2132-
Query: `SELECT text 'this is a text string' = text 'this is a text string' AS true;`,
2133-
Expected: []sql.Row{{"t"}},
2149+
Query: "SELECT * FROM t_text WHERE v1 = 'World';",
2150+
Expected: []sql.Row{
2151+
{2, "World"},
2152+
},
2153+
},
2154+
{
2155+
// Test the new unique constraint on the TEXT column
2156+
Query: "INSERT INTO t_text VALUES (5, 'World');",
2157+
ExpectedErr: "unique",
2158+
},
2159+
{
2160+
Query: "SELECT * FROM t_text_unique WHERE v2 = '!';",
2161+
Expected: []sql.Row{
2162+
{4, nil, "!"},
2163+
},
2164+
},
2165+
{
2166+
Query: "SELECT * FROM t_text_unique WHERE v2 >= '!' ORDER BY v2;",
2167+
Expected: []sql.Row{
2168+
{4, nil, "!"},
2169+
{1, "Hello", "Bonjour"},
2170+
{2, "World", "tout le monde"},
2171+
},
2172+
},
2173+
{
2174+
// Test ordering by TEXT column in a secondary index
2175+
Query: "SELECT * FROM t_text_unique ORDER BY v2;",
2176+
Expected: []sql.Row{
2177+
{3, "", ""},
2178+
{4, nil, "!"},
2179+
{1, "Hello", "Bonjour"},
2180+
{2, "World", "tout le monde"},
2181+
},
2182+
},
2183+
{
2184+
Query: "SELECT * FROM t_text_unique ORDER BY id;",
2185+
Expected: []sql.Row{
2186+
{1, "Hello", "Bonjour"},
2187+
{2, "World", "tout le monde"},
2188+
{3, "", ""},
2189+
{4, nil, "!"},
2190+
},
2191+
},
2192+
{
2193+
Query: "INSERT INTO t_text_unique VALUES (5, 'Another', 'Bonjour');",
2194+
ExpectedErr: "unique",
2195+
},
2196+
{
2197+
// Create a secondary index over multiple text fields
2198+
Query: "CREATE INDEX on t_text_unique(v1, v2);",
2199+
Expected: []sql.Row{},
2200+
},
2201+
{
2202+
Query: "SELECT id FROM t_text_unique WHERE v1='Hello' and v2='Bonjour';",
2203+
Expected: []sql.Row{{1}},
2204+
},
2205+
{
2206+
// Create a table with a TEXT column to test adding a non-unique, secondary index
2207+
Query: `CREATE TABLE t2 (pk int primary key, c1 TEXT);`,
2208+
Expected: []sql.Row{},
2209+
},
2210+
{
2211+
Query: `CREATE INDEX idx1 ON t2(c1);`,
2212+
Expected: []sql.Row{},
2213+
},
2214+
{
2215+
Query: `INSERT INTO t2 VALUES (1, 'one'), (2, 'two');`,
2216+
Expected: []sql.Row{},
2217+
},
2218+
{
2219+
Query: `SELECT c1 from t2 order by c1;`,
2220+
Expected: []sql.Row{{"one"}, {"two"}},
21342221
},
21352222
},
21362223
},

0 commit comments

Comments
 (0)