Skip to content

Commit d2f6e17

Browse files
authored
Refactoring: extract text transform library from libs/tags to libs/textutil (#3621)
## Why <!-- Why are these changes needed? Provide the context that the reviewer might be missing. For example, were there any decisions behind the change that are not reflected in the code itself? --> - Text transformation logic can be useful outside the domain of working with tags - I am currently planning to re-use it for populating a dns-friendly short user name in a following PR: #3623 ## Tests <!-- How have you tested the changes? --> Existing tests <!-- If your PR needs to be included in the release notes for next release, add a separate entry in NEXT_CHANGELOG.md as part of your PR. -->
1 parent 6d2367f commit d2f6e17

File tree

11 files changed

+174
-159
lines changed

11 files changed

+174
-159
lines changed

libs/tags/aws.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"regexp"
55
"unicode"
66

7+
"github.com/databricks/cli/libs/textutil"
8+
79
"golang.org/x/text/unicode/rangetable"
810
)
911

@@ -20,17 +22,17 @@ var awsChars = rangetable.Merge(
2022
var awsTag = &tag{
2123
keyLength: 127,
2224
keyPattern: regexp.MustCompile(`^[\d \w\+\-=\.:\/@]*$`),
23-
keyNormalize: chain(
24-
normalizeMarks(),
25-
replaceNotIn(latin1, '_'),
26-
replaceNotIn(awsChars, '_'),
25+
keyNormalize: textutil.Chain(
26+
textutil.NormalizeMarks(),
27+
textutil.ReplaceNotIn(textutil.Latin1, '_'),
28+
textutil.ReplaceNotIn(awsChars, '_'),
2729
),
2830

2931
valueLength: 255,
3032
valuePattern: regexp.MustCompile(`^[\d \w\+\-=\.:/@]*$`),
31-
valueNormalize: chain(
32-
normalizeMarks(),
33-
replaceNotIn(latin1, '_'),
34-
replaceNotIn(awsChars, '_'),
33+
valueNormalize: textutil.Chain(
34+
textutil.NormalizeMarks(),
35+
textutil.ReplaceNotIn(textutil.Latin1, '_'),
36+
textutil.ReplaceNotIn(awsChars, '_'),
3537
),
3638
}

libs/tags/azure.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package tags
33
import (
44
"regexp"
55

6+
"github.com/databricks/cli/libs/textutil"
7+
68
"golang.org/x/text/unicode/rangetable"
79
)
810

@@ -12,14 +14,14 @@ var azureForbiddenChars = rangetable.New('<', '>', '*', '&', '%', ';', '\\', '/'
1214
var azureTag = &tag{
1315
keyLength: 512,
1416
keyPattern: regexp.MustCompile(`^[^<>\*&%;\\\/\+\?]*$`),
15-
keyNormalize: chain(
16-
replaceNotIn(latin1, '_'),
17-
replaceIn(azureForbiddenChars, '_'),
17+
keyNormalize: textutil.Chain(
18+
textutil.ReplaceNotIn(textutil.Latin1, '_'),
19+
textutil.ReplaceIn(azureForbiddenChars, '_'),
1820
),
1921

2022
valueLength: 256,
2123
valuePattern: regexp.MustCompile(`^.*$`),
22-
valueNormalize: chain(
23-
replaceNotIn(latin1, '_'),
24+
valueNormalize: textutil.Chain(
25+
textutil.ReplaceNotIn(textutil.Latin1, '_'),
2426
),
2527
}

libs/tags/gcp.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package tags
33
import (
44
"regexp"
55
"unicode"
6+
7+
"github.com/databricks/cli/libs/textutil"
68
)
79

810
// Tag keys and values on GCP are limited to 63 characters and must match the
@@ -45,19 +47,19 @@ var gcpInner = &unicode.RangeTable{
4547
var gcpTag = &tag{
4648
keyLength: 63,
4749
keyPattern: regexp.MustCompile(`^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$`),
48-
keyNormalize: chain(
49-
normalizeMarks(),
50-
replaceNotIn(latin1, '_'),
51-
replaceNotIn(gcpInner, '_'),
52-
trimIfNotIn(gcpOuter),
50+
keyNormalize: textutil.Chain(
51+
textutil.NormalizeMarks(),
52+
textutil.ReplaceNotIn(textutil.Latin1, '_'),
53+
textutil.ReplaceNotIn(gcpInner, '_'),
54+
textutil.TrimIfNotIn(gcpOuter),
5355
),
5456

5557
valueLength: 63,
5658
valuePattern: regexp.MustCompile(`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`),
57-
valueNormalize: chain(
58-
normalizeMarks(),
59-
replaceNotIn(latin1, '_'),
60-
replaceNotIn(gcpInner, '_'),
61-
trimIfNotIn(gcpOuter),
59+
valueNormalize: textutil.Chain(
60+
textutil.NormalizeMarks(),
61+
textutil.ReplaceNotIn(textutil.Latin1, '_'),
62+
textutil.ReplaceNotIn(gcpInner, '_'),
63+
textutil.TrimIfNotIn(gcpOuter),
6264
),
6365
}

libs/tags/latin_test.go

Lines changed: 0 additions & 16 deletions
This file was deleted.

libs/tags/tag.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,20 @@ import (
66
"regexp"
77
"strings"
88
"unicode"
9+
10+
"github.com/databricks/cli/libs/textutil"
911
)
1012

1113
// The tag type holds the validation and normalization rules for
1214
// a cloud provider's resource tags as applied by Databricks.
1315
type tag struct {
1416
keyLength int
1517
keyPattern *regexp.Regexp
16-
keyNormalize transformer
18+
keyNormalize textutil.Transformer
1719

1820
valueLength int
1921
valuePattern *regexp.Regexp
20-
valueNormalize transformer
22+
valueNormalize textutil.Transformer
2123
}
2224

2325
func (t *tag) ValidateKey(s string) error {
@@ -27,7 +29,7 @@ func (t *tag) ValidateKey(s string) error {
2729
if len(s) > t.keyLength {
2830
return fmt.Errorf("key length %d exceeds maximum of %d", len(s), t.keyLength)
2931
}
30-
if strings.ContainsFunc(s, func(r rune) bool { return !unicode.Is(latin1, r) }) {
32+
if strings.ContainsFunc(s, func(r rune) bool { return !unicode.Is(textutil.Latin1, r) }) {
3133
return errors.New("key contains non-latin1 characters")
3234
}
3335
if !t.keyPattern.MatchString(s) {
@@ -40,7 +42,7 @@ func (t *tag) ValidateValue(s string) error {
4042
if len(s) > t.valueLength {
4143
return fmt.Errorf("value length %d exceeds maximum of %d", len(s), t.valueLength)
4244
}
43-
if strings.ContainsFunc(s, func(r rune) bool { return !unicode.Is(latin1, r) }) {
45+
if strings.ContainsFunc(s, func(r rune) bool { return !unicode.Is(textutil.Latin1, r) }) {
4446
return errors.New("value contains non-latin1 characters")
4547
}
4648
if !t.valuePattern.MatchString(s) {
@@ -50,9 +52,9 @@ func (t *tag) ValidateValue(s string) error {
5052
}
5153

5254
func (t *tag) NormalizeKey(s string) string {
53-
return t.keyNormalize.transform(s)
55+
return t.keyNormalize.TransformString(s)
5456
}
5557

5658
func (t *tag) NormalizeValue(s string) string {
57-
return t.valueNormalize.transform(s)
59+
return t.valueNormalize.TransformString(s)
5860
}

libs/tags/transform.go

Lines changed: 0 additions & 87 deletions
This file was deleted.

libs/tags/transform_test.go

Lines changed: 0 additions & 25 deletions
This file was deleted.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
package tags
1+
package textutil
22

33
import "unicode"
44

55
// Range table for all characters in the Latin1 character set.
6-
var latin1 = &unicode.RangeTable{
6+
var Latin1 = &unicode.RangeTable{
77
R16: []unicode.Range16{
88
{0x0000, 0x00ff, 1},
99
},

libs/textutil/latin_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package textutil
2+
3+
import (
4+
"testing"
5+
"unicode"
6+
7+
"github.com/stretchr/testify/assert"
8+
)
9+
10+
func TestLatinTable(t *testing.T) {
11+
assert.True(t, unicode.In('\u0000', Latin1))
12+
assert.True(t, unicode.In('A', Latin1))
13+
assert.True(t, unicode.In('Z', Latin1))
14+
assert.True(t, unicode.In('\u00ff', Latin1))
15+
assert.False(t, unicode.In('\u0100', Latin1))
16+
}

0 commit comments

Comments
 (0)