Skip to content

Commit 52800a2

Browse files
authored
feat: add support for regexFind and regexFindAll (#2474)
1 parent 3944989 commit 52800a2

File tree

4 files changed

+576
-1
lines changed

4 files changed

+576
-1
lines changed

api-report/firestore.api.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,10 @@ abstract class Expression implements firestore.Pipelines.Expression, HasUserData
10461046
_protoValueType: "ProtoValue";
10471047
regexContains(pattern: string): BooleanExpression;
10481048
regexContains(pattern: Expression): BooleanExpression;
1049+
regexFind(pattern: string): FunctionExpression;
1050+
regexFind(pattern: Expression): FunctionExpression;
1051+
regexFindAll(pattern: string): FunctionExpression;
1052+
regexFindAll(pattern: Expression): FunctionExpression;
10491053
regexMatch(pattern: string): BooleanExpression;
10501054
regexMatch(pattern: Expression): BooleanExpression;
10511055
reverse(): FunctionExpression;
@@ -2844,7 +2848,7 @@ function xor(first: BooleanExpression, second: BooleanExpression, ...additionalC
28442848
// build/types/src/reference/vector-query.d.ts:61:8 - (tsdoc-undefined-tag) The TSDoc tag "@private" is not defined in this configuration
28452849
// build/types/src/serializer.d.ts:30:4 - (tsdoc-undefined-tag) The TSDoc tag "@private" is not defined in this configuration
28462850
// build/types/src/serializer.d.ts:40:4 - (tsdoc-undefined-tag) The TSDoc tag "@private" is not defined in this configuration
2847-
// build/types/src/telemetry/trace-util.d.ts:66:4 - (tsdoc-undefined-tag) The TSDoc tag "@private" is not defined in this configuration
2851+
// build/types/src/telemetry/trace-util.d.ts:67:4 - (tsdoc-undefined-tag) The TSDoc tag "@private" is not defined in this configuration
28482852
// build/types/src/write-batch.d.ts:85:8 - (tsdoc-undefined-tag) The TSDoc tag "@private" is not defined in this configuration
28492853
// build/types/src/write-batch.d.ts:108:8 - (tsdoc-undefined-tag) The TSDoc tag "@private" is not defined in this configuration
28502854

dev/src/pipelines/expression.ts

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,92 @@ export abstract class Expression
825825
]).asBoolean();
826826
}
827827

828+
/**
829+
* @beta
830+
* Creates an expression that returns the first substring of a string expression that matches
831+
* a specified regular expression.
832+
*
833+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
834+
*
835+
* @example
836+
* ```typescript
837+
* // Extract the domain from an email address
838+
* field("email").regexFind("@.+")
839+
* ```
840+
*
841+
* @param pattern - The regular expression to search for.
842+
* @returns A new `Expression` representing the regular expression find function.
843+
*/
844+
regexFind(pattern: string): FunctionExpression;
845+
846+
/**
847+
* @beta
848+
* Creates an expression that returns the first substring of a string expression that matches
849+
* a specified regular expression.
850+
*
851+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
852+
*
853+
* @example
854+
* ```typescript
855+
* // Extract the domain from an email address
856+
* field("email").regexFind(field("domain"))
857+
* ```
858+
*
859+
* @param pattern - The regular expression to search for.
860+
* @returns A new `Expression` representing the regular expression find function.
861+
*/
862+
regexFind(pattern: Expression): FunctionExpression;
863+
regexFind(stringOrExpr: string | Expression): FunctionExpression {
864+
return new FunctionExpression('regex_find', [
865+
this,
866+
valueToDefaultExpr(stringOrExpr),
867+
]);
868+
}
869+
870+
/**
871+
* @beta
872+
*
873+
* Creates an expression that evaluates to a list of all substrings in this string expression that
874+
* match a specified regular expression.
875+
*
876+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
877+
*
878+
* @example
879+
* ```typescript
880+
* // Extract all hashtags from a post content field
881+
* field("content").regexFindAll("#[A-Za-z0-9_]+")
882+
* ```
883+
*
884+
* @param pattern - The regular expression to search for.
885+
* @returns A new `Expression` that evaluates to an array of matched substrings.
886+
*/
887+
regexFindAll(pattern: string): FunctionExpression;
888+
889+
/**
890+
* @beta
891+
*
892+
* Creates an expression that evaluates to a list of all substrings in this string expression that
893+
* match a specified regular expression.
894+
*
895+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
896+
*
897+
* @example
898+
* ```typescript
899+
* // Extract all names from a post content field
900+
* field("content").regexFindAll(field("names"))
901+
* ```
902+
*
903+
* @param pattern - The regular expression to search for.
904+
* @returns A new `Expression` that evaluates to an array of matched substrings.
905+
*/
906+
regexFindAll(pattern: Expression): FunctionExpression;
907+
regexFindAll(stringOrExpr: string | Expression): FunctionExpression {
908+
return new FunctionExpression('regex_find_all', [
909+
this,
910+
valueToDefaultExpr(stringOrExpr),
911+
]);
912+
}
913+
828914
/**
829915
* @beta
830916
* Creates an expression that checks if a string matches a specified regular expression.
@@ -5538,6 +5624,206 @@ export function regexContains(
55385624
return leftExpr.regexContains(patternExpr);
55395625
}
55405626

5627+
/**
5628+
* @beta
5629+
*
5630+
* Creates an expression that returns the first substring of a string field that matches a
5631+
* specified regular expression.
5632+
*
5633+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
5634+
*
5635+
* @example
5636+
* ```typescript
5637+
* // Extract the domain name from an email field
5638+
* regexFind("email", "@[A-Za-z0-9.-]+");
5639+
* ```
5640+
*
5641+
* @param fieldName - The name of the field containing the string to search.
5642+
* @param pattern - The regular expression to search for.
5643+
* @returns A new `Expression` representing the regular expression find function.
5644+
*/
5645+
export function regexFind(
5646+
fieldName: string,
5647+
pattern: string,
5648+
): FunctionExpression;
5649+
5650+
/**
5651+
* @beta
5652+
*
5653+
* Creates an expression that returns the first substring of a string field that matches a
5654+
* specified regular expression.
5655+
*
5656+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
5657+
*
5658+
* @example
5659+
* ```typescript
5660+
* // Extract a substring from 'email' based on a pattern stored in another field
5661+
* regexFind("email", field("pattern"));
5662+
* ```
5663+
*
5664+
* @param fieldName - The name of the field containing the string to search.
5665+
* @param pattern - The regular expression to search for.
5666+
* @returns A new `Expression` representing the regular expression find function.
5667+
*/
5668+
export function regexFind(
5669+
fieldName: string,
5670+
pattern: Expression,
5671+
): FunctionExpression;
5672+
5673+
/**
5674+
* @beta
5675+
*
5676+
* Creates an expression that returns the first substring of a string expression that matches
5677+
* a specified regular expression.
5678+
*
5679+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
5680+
*
5681+
* @example
5682+
* ```typescript
5683+
* // Extract the domain from a lower-cased email address
5684+
* regexFind(field("email"), "@[A-Za-z0-9.-]+");
5685+
* ```
5686+
*
5687+
* @param stringExpression - The expression representing the string to search.
5688+
* @param pattern - The regular expression to search for.
5689+
* @returns A new `Expression` representing the regular expression find function.
5690+
*/
5691+
export function regexFind(
5692+
stringExpression: Expression,
5693+
pattern: string,
5694+
): FunctionExpression;
5695+
5696+
/**
5697+
* @beta
5698+
*
5699+
* Creates an expression that returns the first substring of a string expression that matches
5700+
* a specified regular expression.
5701+
*
5702+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
5703+
*
5704+
* @example
5705+
* ```typescript
5706+
* // Extract a substring based on a dynamic pattern field
5707+
* regexFind(field("email"), field("pattern"));
5708+
* ```
5709+
*
5710+
* @param stringExpression - The expression representing the string to search.
5711+
* @param pattern - The regular expression to search for.
5712+
* @returns A new `Expression` representing the regular expression find function.
5713+
*/
5714+
export function regexFind(
5715+
stringExpression: Expression,
5716+
pattern: Expression,
5717+
): FunctionExpression;
5718+
export function regexFind(
5719+
left: Expression | string,
5720+
pattern: Expression | string,
5721+
): FunctionExpression {
5722+
const leftExpr = fieldOrExpression(left);
5723+
const patternExpr = valueToDefaultExpr(pattern);
5724+
return leftExpr.regexFind(patternExpr);
5725+
}
5726+
5727+
/**
5728+
* @beta
5729+
*
5730+
* Creates an expression that evaluates to a list of all substrings in a string field that
5731+
* match a specified regular expression.
5732+
*
5733+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
5734+
*
5735+
* @example
5736+
* ```typescript
5737+
* // Extract all hashtags from a post content field
5738+
* regexFindAll("content", "#[A-Za-z0-9_]+");
5739+
* ```
5740+
*
5741+
* @param fieldName - The name of the field containing the string to search.
5742+
* @param pattern - The regular expression to search for.
5743+
* @returns A new `Expression` that evaluates to an array of matched substrings.
5744+
*/
5745+
export function regexFindAll(
5746+
fieldName: string,
5747+
pattern: string,
5748+
): FunctionExpression;
5749+
5750+
/**
5751+
* @beta
5752+
*
5753+
* Creates an expression that evaluates to a list of all substrings in a string field that
5754+
* match a specified regular expression.
5755+
*
5756+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
5757+
*
5758+
* @example
5759+
* ```typescript
5760+
* // Extract all matches from 'content' based on a pattern stored in another field
5761+
* regexFindAll("content", field("pattern"));
5762+
* ```
5763+
*
5764+
* @param fieldName - The name of the field containing the string to search.
5765+
* @param pattern - The regular expression to search for.
5766+
* @returns A new `Expression` that evaluates to an array of matched substrings.
5767+
*/
5768+
export function regexFindAll(
5769+
fieldName: string,
5770+
pattern: Expression,
5771+
): FunctionExpression;
5772+
5773+
/**
5774+
* @beta
5775+
*
5776+
* Creates an expression that evaluates to a list of all substrings in a string expression
5777+
* that match a specified regular expression.
5778+
*
5779+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
5780+
*
5781+
* @example
5782+
* ```typescript
5783+
* // Extract all mentions from a lower-cased comment
5784+
* regexFindAll(field("comment"), "@[A-Za-z0-9_]+");
5785+
* ```
5786+
*
5787+
* @param stringExpression - The expression representing the string to search.
5788+
* @param pattern - The regular expression to search for.
5789+
* @returns A new `Expression` that evaluates to an array of matched substrings.
5790+
*/
5791+
export function regexFindAll(
5792+
stringExpression: Expression,
5793+
pattern: string,
5794+
): FunctionExpression;
5795+
5796+
/**
5797+
* @beta
5798+
*
5799+
* Creates an expression that evaluates to a list of all substrings in a string expression
5800+
* that match a specified regular expression.
5801+
*
5802+
* This expression uses the {@link https://github.com/google/re2/wiki/Syntax | RE2} regular expression syntax.
5803+
*
5804+
* @example
5805+
* ```typescript
5806+
* // Extract all matches based on a dynamic pattern expression
5807+
* regexFindAll(field("comment"), field("pattern"));
5808+
* ```
5809+
*
5810+
* @param stringExpression - The expression representing the string to search.
5811+
* @param pattern - The regular expression to search for.
5812+
* @returns A new `Expression` that evaluates to an array of matched substrings.
5813+
*/
5814+
export function regexFindAll(
5815+
stringExpression: Expression,
5816+
pattern: Expression,
5817+
): FunctionExpression;
5818+
export function regexFindAll(
5819+
left: Expression | string,
5820+
pattern: Expression | string,
5821+
): FunctionExpression {
5822+
const leftExpr = fieldOrExpression(left);
5823+
const patternExpr = valueToDefaultExpr(pattern);
5824+
return leftExpr.regexFindAll(patternExpr);
5825+
}
5826+
55415827
/**
55425828
* @beta
55435829
* Creates an expression that checks if a string field matches a specified regular expression.

dev/system-test/pipeline.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ import {getTestDb, getTestRoot} from './firestore';
141141

142142
import {Firestore as InternalFirestore} from '../src';
143143
import {ServiceError} from 'google-gax';
144+
import {regexFind, regexFindAll} from '../src/pipelines/expression';
144145

145146
use(chaiAsPromised);
146147

@@ -2617,6 +2618,40 @@ describe.skipClassic('Pipeline class', () => {
26172618
expect(snapshot.results.length).to.equal(5);
26182619
});
26192620

2621+
it('testRegexFind', async () => {
2622+
const snapshot = await firestore
2623+
.pipeline()
2624+
.collection(randomCol.path)
2625+
.select(regexFind('title', '^\\w+').as('firstWordInTitle'))
2626+
.select('firstWordInTitle')
2627+
.sort(field('firstWordInTitle').ascending())
2628+
.limit(3)
2629+
.execute();
2630+
expectResults(
2631+
snapshot,
2632+
{firstWordInTitle: '1984'},
2633+
{firstWordInTitle: 'Crime'},
2634+
{firstWordInTitle: 'Dune'},
2635+
);
2636+
});
2637+
2638+
it('testRegexFindAll', async () => {
2639+
const snapshot = await firestore
2640+
.pipeline()
2641+
.collection(randomCol.path)
2642+
.select(regexFindAll('title', '\\w+').as('wordsInTitle'))
2643+
.select('wordsInTitle')
2644+
.sort(field('wordsInTitle').ascending())
2645+
.limit(3)
2646+
.execute();
2647+
expectResults(
2648+
snapshot,
2649+
{wordsInTitle: ['1984']},
2650+
{wordsInTitle: ['Crime', 'and', 'Punishment']},
2651+
{wordsInTitle: ['Dune']},
2652+
);
2653+
});
2654+
26202655
it('testRegexMatches', async () => {
26212656
const snapshot = await firestore
26222657
.pipeline()

0 commit comments

Comments
 (0)