|
84 | 84 | import java.util.List; |
85 | 85 | import java.util.Map; |
86 | 86 | import java.util.Set; |
87 | | -import java.util.function.Function; |
88 | 87 | import java.util.stream.Collectors; |
89 | 88 | import java.util.stream.Stream; |
90 | 89 |
|
@@ -755,222 +754,6 @@ private void resolveInferences( |
755 | 754 | inferenceRunner.resolveInferenceIds(inferencePlans, l.map(preAnalysisResult::withInferenceResolution)); |
756 | 755 | } |
757 | 756 |
|
758 | | - static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicyMatchFields, PreAnalysisResult result) { |
759 | | - List<LogicalPlan> inlinestats = parsed.collect(InlineStats.class::isInstance); |
760 | | - Set<Aggregate> inlinestatsAggs = new HashSet<>(); |
761 | | - for (var i : inlinestats) { |
762 | | - inlinestatsAggs.add(((InlineStats) i).aggregate()); |
763 | | - } |
764 | | - |
765 | | - if (false == parsed.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs))) { |
766 | | - // no explicit columns selection, for example "from employees" |
767 | | - // also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*" |
768 | | - return result.withFieldNames(IndexResolver.ALL_FIELDS); |
769 | | - } |
770 | | - |
771 | | - Holder<Boolean> projectAll = new Holder<>(false); |
772 | | - parsed.forEachExpressionDown(UnresolvedStar.class, us -> {// explicit "*" fields selection |
773 | | - if (projectAll.get()) { |
774 | | - return; |
775 | | - } |
776 | | - projectAll.set(true); |
777 | | - }); |
778 | | - |
779 | | - if (projectAll.get()) { |
780 | | - return result.withFieldNames(IndexResolver.ALL_FIELDS); |
781 | | - } |
782 | | - |
783 | | - var referencesBuilder = new Holder<>(AttributeSet.builder()); |
784 | | - // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some |
785 | | - // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the |
786 | | - // wildcards. |
787 | | - // |
788 | | - // E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for |
789 | | - // "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for |
790 | | - // |
791 | | - // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder |
792 | | - // Except in KEEP and DROP. |
793 | | - var keepRefs = AttributeSet.builder(); |
794 | | - var dropWildcardRefs = AttributeSet.builder(); |
795 | | - // fields required to request for lookup joins to work |
796 | | - var joinRefs = AttributeSet.builder(); |
797 | | - // lookup indices where we request "*" because we may require all their fields |
798 | | - Set<String> wildcardJoinIndices = new java.util.HashSet<>(); |
799 | | - |
800 | | - var canRemoveAliases = new Holder<>(true); |
801 | | - |
802 | | - var processingLambda = new Holder<Function<LogicalPlan, Boolean>>(); |
803 | | - processingLambda.set((LogicalPlan p) -> {// go over each plan top-down |
804 | | - if (p instanceof Fork fork) { |
805 | | - // Early return from forEachDown. We will iterate over the children manually. |
806 | | - var forkRefsResult = AttributeSet.builder(); |
807 | | - forkRefsResult.addAll(referencesBuilder.get()); |
808 | | - |
809 | | - for (var child : fork.children()) { |
810 | | - referencesBuilder.set(AttributeSet.builder()); |
811 | | - var return_result = child.forEachDownMayReturnEarly(processingLambda.get()); |
812 | | - // No nested Forks for now... |
813 | | - assert return_result; |
814 | | - forkRefsResult.addAll(referencesBuilder.get()); |
815 | | - } |
816 | | - |
817 | | - forkRefsResult.removeIf(attr -> attr.name().equals(Fork.FORK_FIELD)); |
818 | | - referencesBuilder.set(forkRefsResult); |
819 | | - return false; |
820 | | - } else if (p instanceof RegexExtract re) { // for Grok and Dissect |
821 | | - // keep the inputs needed by Grok/Dissect |
822 | | - referencesBuilder.get().addAll(re.input().references()); |
823 | | - } else if (p instanceof Enrich enrich) { |
824 | | - AttributeSet enrichFieldRefs = Expressions.references(enrich.enrichFields()); |
825 | | - AttributeSet.Builder enrichRefs = enrichFieldRefs.combine(enrich.matchField().references()).asBuilder(); |
826 | | - // Enrich adds an EmptyAttribute if no match field is specified |
827 | | - // The exact name of the field will be added later as part of enrichPolicyMatchFields Set |
828 | | - enrichRefs.removeIf(attr -> attr instanceof EmptyAttribute); |
829 | | - referencesBuilder.get().addAll(enrichRefs); |
830 | | - } else if (p instanceof LookupJoin join) { |
831 | | - if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { |
832 | | - joinRefs.addAll(usingJoinType.columns()); |
833 | | - } |
834 | | - if (keepRefs.isEmpty()) { |
835 | | - // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution |
836 | | - wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); |
837 | | - } else { |
838 | | - // Keep commands can reference the join columns with names that shadow aliases, so we block their removal |
839 | | - joinRefs.addAll(keepRefs); |
840 | | - } |
841 | | - } else { |
842 | | - referencesBuilder.get().addAll(p.references()); |
843 | | - if (p instanceof UnresolvedRelation ur && ur.indexMode() == IndexMode.TIME_SERIES) { |
844 | | - // METRICS aggs generally rely on @timestamp without the user having to mention it. |
845 | | - referencesBuilder.get().add(new UnresolvedAttribute(ur.source(), MetadataAttribute.TIMESTAMP_FIELD)); |
846 | | - } |
847 | | - // special handling for UnresolvedPattern (which is not an UnresolvedAttribute) |
848 | | - p.forEachExpression(UnresolvedNamePattern.class, up -> { |
849 | | - var ua = new UnresolvedAttribute(up.source(), up.name()); |
850 | | - referencesBuilder.get().add(ua); |
851 | | - if (p instanceof Keep) { |
852 | | - keepRefs.add(ua); |
853 | | - } else if (p instanceof Drop) { |
854 | | - dropWildcardRefs.add(ua); |
855 | | - } else { |
856 | | - throw new IllegalStateException("Only KEEP and DROP should allow wildcards"); |
857 | | - } |
858 | | - }); |
859 | | - if (p instanceof Keep) { |
860 | | - keepRefs.addAll(p.references()); |
861 | | - } |
862 | | - } |
863 | | - |
864 | | - // If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of |
865 | | - // command that we may add in the future which can override already defined Aliases with EVAL |
866 | | - // (for example |
867 | | - // |
868 | | - // from test |
869 | | - // | eval ip = 123 |
870 | | - // | enrich ips_policy ON hostname |
871 | | - // | rename ip AS my_ip |
872 | | - // |
873 | | - // and ips_policy enriches the results with the same name ip field), |
874 | | - // these aliases should be kept in the list of fields. |
875 | | - if (canRemoveAliases.get() && p.anyMatch(EsqlSession::couldOverrideAliases)) { |
876 | | - canRemoveAliases.set(false); |
877 | | - } |
878 | | - if (canRemoveAliases.get()) { |
879 | | - // remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree |
880 | | - // for example "from test | eval x = salary | stats max = max(x) by gender" |
881 | | - // remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" |
882 | | - // also remove other down-the-tree references to the extracted fields from "grok" and "dissect" |
883 | | - AttributeSet planRefs = p.references(); |
884 | | - Set<String> fieldNames = planRefs.names(); |
885 | | - p.forEachExpressionDown(NamedExpression.class, ne -> { |
886 | | - if ((ne instanceof Alias || ne instanceof ReferenceAttribute) == false) { |
887 | | - return; |
888 | | - } |
889 | | - // do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id" |
890 | | - // or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)" |
891 | | - if (fieldNames.contains(ne.name())) { |
892 | | - return; |
893 | | - } |
894 | | - referencesBuilder.get() |
895 | | - .removeIf(attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))); |
896 | | - }); |
897 | | - } |
898 | | - |
899 | | - // No early return. |
900 | | - return true; |
901 | | - }); |
902 | | - parsed.forEachDownMayReturnEarly(processingLambda.get()); |
903 | | - |
904 | | - // Add JOIN ON column references afterward to avoid Alias removal |
905 | | - referencesBuilder.get().addAll(joinRefs); |
906 | | - // If any JOIN commands need wildcard field-caps calls, persist the index names |
907 | | - if (wildcardJoinIndices.isEmpty() == false) { |
908 | | - result = result.withWildcardJoinIndices(wildcardJoinIndices); |
909 | | - } |
910 | | - |
911 | | - // remove valid metadata attributes because they will be filtered out by the IndexResolver anyway |
912 | | - // otherwise, in some edge cases, we will fail to ask for "*" (all fields) instead |
913 | | - referencesBuilder.get().removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.name())); |
914 | | - Set<String> fieldNames = referencesBuilder.get().build().names(); |
915 | | - |
916 | | - if (fieldNames.isEmpty() && enrichPolicyMatchFields.isEmpty()) { |
917 | | - // there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index |
918 | | - return result.withFieldNames(IndexResolver.INDEX_METADATA_FIELD); |
919 | | - } else { |
920 | | - fieldNames.addAll(subfields(fieldNames)); |
921 | | - fieldNames.addAll(enrichPolicyMatchFields); |
922 | | - fieldNames.addAll(subfields(enrichPolicyMatchFields)); |
923 | | - return result.withFieldNames(fieldNames); |
924 | | - } |
925 | | - } |
926 | | - |
927 | | - /** |
928 | | - * Indicates whether the given plan gives an exact list of fields that we need to collect from field_caps. |
929 | | - */ |
930 | | - private static boolean shouldCollectReferencedFields(LogicalPlan plan, Set<Aggregate> inlinestatsAggs) { |
931 | | - return plan instanceof Project || (plan instanceof Aggregate agg && inlinestatsAggs.contains(agg) == false); |
932 | | - } |
933 | | - |
934 | | - /** |
935 | | - * Could a plan "accidentally" override aliases? |
936 | | - * Examples are JOIN and ENRICH, that _could_ produce fields with the same |
937 | | - * name of an existing alias, based on their index mapping. |
938 | | - * Here we just have to consider commands where this information is not available before index resolution, |
939 | | - * eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know. |
940 | | - */ |
941 | | - private static boolean couldOverrideAliases(LogicalPlan p) { |
942 | | - return (p instanceof Aggregate |
943 | | - || p instanceof Completion |
944 | | - || p instanceof Drop |
945 | | - || p instanceof Eval |
946 | | - || p instanceof Filter |
947 | | - || p instanceof Fork |
948 | | - || p instanceof InlineStats |
949 | | - || p instanceof Insist |
950 | | - || p instanceof Keep |
951 | | - || p instanceof Limit |
952 | | - || p instanceof MvExpand |
953 | | - || p instanceof OrderBy |
954 | | - || p instanceof Project |
955 | | - || p instanceof RegexExtract |
956 | | - || p instanceof Rename |
957 | | - || p instanceof TopN |
958 | | - || p instanceof UnresolvedRelation) == false; |
959 | | - } |
960 | | - |
961 | | - private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) { |
962 | | - boolean isPattern = Regex.isSimpleMatchPattern(attr.name()); |
963 | | - if (skipIfPattern && isPattern) { |
964 | | - return false; |
965 | | - } |
966 | | - var name = attr.name(); |
967 | | - return isPattern ? Regex.simpleMatch(name, other) : name.equals(other); |
968 | | - } |
969 | | - |
970 | | - private static Set<String> subfields(Set<String> names) { |
971 | | - return names.stream().filter(name -> name.endsWith(WILDCARD) == false).map(name -> name + ".*").collect(Collectors.toSet()); |
972 | | - } |
973 | | - |
974 | 757 | private PhysicalPlan logicalPlanToPhysicalPlan(LogicalPlan optimizedPlan, EsqlQueryRequest request) { |
975 | 758 | PhysicalPlan physicalPlan = optimizedPhysicalPlan(optimizedPlan); |
976 | 759 | physicalPlan = physicalPlan.transformUp(FragmentExec.class, f -> { |
|
0 commit comments