Skip to content

Commit b1545e7

Browse files
committed
Implement partitions() table function
commit_hash:19bbcd26d46a4ca8d18f0dbae605b48ac823c614
1 parent 3523178 commit b1545e7

File tree

10 files changed

+662
-0
lines changed

10 files changed

+662
-0
lines changed

yql/essentials/minikql/mkql_type_ops.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2445,6 +2445,29 @@ bool IsValidStringValue(NUdf::EDataSlot type, NUdf::TStringRef buf) {
24452445
MKQL_ENSURE(false, "Incorrect data slot: " << (ui32)type);
24462446
}
24472447

2448+
TMaybe<TString> RegexMatchingValidStringValues(NUdf::EDataSlot type, ERegexFlavor flavor) {
2449+
if (flavor != ERegexFlavor::RE2) {
2450+
return {};
2451+
}
2452+
2453+
const auto& typeInfo = NUdf::GetDataTypeInfo(type);
2454+
if (typeInfo.Features & NUdf::EDataTypeFeatures::IntegralType) {
2455+
TStringBuilder result;
2456+
if (typeInfo.Features & NUdf::EDataTypeFeatures::SignedIntegralType) {
2457+
result << R"([-+]?)";
2458+
} else {
2459+
result << R"(\+?)";
2460+
}
2461+
result << R"(\d+)";
2462+
return result;
2463+
}
2464+
if (type == NUdf::EDataSlot::Date) {
2465+
return R"(\d{4}-\d{2}-\d{2})";
2466+
}
2467+
// TODO: more cases
2468+
return {};
2469+
}
2470+
24482471
NUdf::TUnboxedValuePod ValueFromString(NUdf::EDataSlot type, NUdf::TStringRef buf) {
24492472
switch (type) {
24502473
case NUdf::EDataSlot::Bool: {

yql/essentials/minikql/mkql_type_ops.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ ui32 GetMonthLength(ui32 month, bool isLeap);
2121

2222
bool IsValidStringValue(NUdf::EDataSlot type, NUdf::TStringRef buf);
2323

24+
enum class ERegexFlavor {
25+
RE2,
26+
};
27+
TMaybe<TString> RegexMatchingValidStringValues(NUdf::EDataSlot type, ERegexFlavor flavor);
28+
2429
NUdf::TUnboxedValuePod ValueFromString(NUdf::EDataSlot type, NUdf::TStringRef buf);
2530
NUdf::TUnboxedValuePod SimpleValueFromYson(NUdf::EDataSlot type, NUdf::TStringRef buf);
2631

yql/essentials/sql/v1/complete/name/service/static/name_service.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ class TFunctionNameService: public IRankingNameService {
189189
"WalkFolders",
190190
"EACH",
191191
"PARTITION_LIST",
192+
"PARTITIONS",
192193
}, NormalizeName))
193194
{
194195
}

yql/essentials/sql/v1/complete/sql_complete_ut.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,7 @@ Y_UNIT_TEST(SelectFrom) {
604604
{FunctionName, "FILTER()", 1},
605605
{FunctionName, "FOLDER()", 1},
606606
{FunctionName, "LIKE()", 1},
607+
{FunctionName, "PARTITIONS()", 1},
607608
{FunctionName, "PARTITION_LIST()", 1},
608609
{FunctionName, "RANGE()", 1},
609610
{FunctionName, "REGEXP()", 1},

yql/essentials/sql/v1/query.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,48 @@ class TPrepTableKeys: public ITableKeys {
849849
return nullptr;
850850
}
851851
return Y(func.EndsWith("strict") ? "MrPartitionListStrict" : "MrPartitionList", Y("EvaluateExpr", arg.Expr));
852+
} else if (func == "partitions" || func == "partitionsstrict") {
853+
auto requiredLangVer = MakeLangVersion(2025, 4);
854+
if (!IsBackwardCompatibleFeatureAvailable(ctx.Settings.LangVer, requiredLangVer, ctx.Settings.BackportMode)) {
855+
auto str = FormatLangVersion(requiredLangVer);
856+
YQL_ENSURE(str);
857+
ctx.Error(Pos_) << "PARTITIONS table function is not available before language version " << *str;
858+
return nullptr;
859+
}
860+
861+
if (ctx.DiscoveryMode) {
862+
ctx.Error(Pos_, TIssuesIds::YQL_NOT_ALLOWED_IN_DISCOVERY) << "PARTITIONS is not allowed in Discovery mode";
863+
return nullptr;
864+
}
865+
866+
if (Args_.size() != 2) {
867+
ctx.Error(Pos_) << "PARTITIONS requires 2 arguments, but got: " << Args_.size();
868+
return nullptr;
869+
}
870+
871+
if (Args_[0].HasAt || Args_[1].HasAt) {
872+
ctx.Error(Pos_) << "Temporary tables are not supported here";
873+
return nullptr;
874+
}
875+
876+
if (!Args_[1].View.empty()) {
877+
ctx.Error(Pos_) << "VIEW should be used only in first argument";
878+
return nullptr;
879+
}
880+
881+
ExtractTableName(ctx, Args_[0]);
882+
ExtractTableName(ctx, Args_[1]);
883+
auto path = ctx.GetPrefixedPath(Service_, Cluster_, Args_[0].Id);
884+
if (!path) {
885+
return nullptr;
886+
}
887+
TNodePtr key = Y("Key", Q(Y(Q("table"), Y("String", path))));
888+
key = AddView(key, Args_[0].View);
889+
if (!ValidateView(GetPos(), ctx, Service_, Args_[0].View)) {
890+
return nullptr;
891+
}
892+
TDeferredAtom pattern = Args_[1].Id;
893+
return Y(func.EndsWith("strict") ? "MrPartitionsStrict" : "MrPartitions", key, pattern.Build());
852894
}
853895

854896
ctx.Error(Pos_) << "Unknown table name preprocessor: " << Func_;

yt/yql/providers/yt/common/yql_names.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ const TStringBuf MrWalkFoldersImplName = "MrWalkFoldersImpl";
5555
const TStringBuf MrRangeInputListInternal = "MrRangeInputListInternal";
5656
const TStringBuf MrPartitionListName = "MrPartitionList";
5757
const TStringBuf MrPartitionListStrictName = "MrPartitionListStrict";
58+
const TStringBuf MrPartitionsName = "MrPartitions";
59+
const TStringBuf MrPartitionsStrictName = "MrPartitionsStrict";
5860

5961
// YT related names
6062
const TStringBuf READ_SCHEMA_ATTR_NAME = "_read_schema";

yt/yql/providers/yt/provider/ya.make

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ SRCS(
2323
yql_yt_helpers.cpp
2424
yql_yt_intent_determination.cpp
2525
yql_yt_io_discovery.cpp
26+
yql_yt_io_discovery_partitions.cpp
2627
yql_yt_io_discovery_walk_folders.cpp
2728
yql_yt_join_impl.cpp
2829
yql_yt_join_reorder.cpp
@@ -72,6 +73,7 @@ SRCS(
7273
)
7374

7475
PEERDIR(
76+
contrib/libs/re2
7577
library/cpp/yson/node
7678
library/cpp/json/writer
7779
library/cpp/json

yt/yql/providers/yt/provider/yql_yt_io_discovery.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "yql_yt_gateway.h"
44
#include "yql_yt_op_settings.h"
55
#include "yql_yt_helpers.h"
6+
#include "yql_yt_io_discovery_partitions.h"
67
#include "yql_yt_io_discovery_walk_folders.h"
78

89
#include <yt/yql/providers/yt/expr_nodes/yql_yt_expr_nodes.h>
@@ -52,6 +53,7 @@ class TYtIODiscoveryTransformer : public TGraphTransformerBase {
5253
const bool evaluationInProgress = State_->Types->EvaluationInProgress;
5354
TOptimizeExprSettings settings(nullptr);
5455
settings.VisitChanges = true;
56+
bool seenMrPartitions = false;
5557
auto status = OptimizeExpr(input, output, [&](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr {
5658
if (auto maybeRead = TMaybeNode<TYtRead>(node)) {
5759
if (!maybeRead.DataSource()) { // Validates provider
@@ -69,6 +71,11 @@ class TYtIODiscoveryTransformer : public TGraphTransformerBase {
6971
return {};
7072
}
7173

74+
if (read.Arg(2).Ref().IsCallable({MrPartitionsName, MrPartitionsStrictName})) {
75+
seenMrPartitions = true;
76+
return ExpandMrPartitions(read, ctx, *(State_->Types));
77+
}
78+
7279
if (read.Arg(2).Ref().IsCallable({MrPartitionListName, MrPartitionListStrictName})) {
7380
return ExpandPartitionList(read, ctx);
7481
}
@@ -193,6 +200,12 @@ class TYtIODiscoveryTransformer : public TGraphTransformerBase {
193200
}, ctx, settings);
194201

195202
if (status.Level != TStatus::Ok) {
203+
if (seenMrPartitions && status.Level == TStatus::Repeat) {
204+
ctx.Step
205+
.Repeat(TExprStep::ExpandApplyForLambdas)
206+
.Repeat(TExprStep::ExprEval);
207+
return TStatus(TStatus::Repeat, true);
208+
}
196209
return status;
197210
}
198211

0 commit comments

Comments
 (0)