flink-sql-runner/stdlib-docs/library-functions.yml at main · DataSQRL/flink-sql-runner · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#
# Copyright © 2026 DataSQRL (contact@datasqrl.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

math:
  - name: cbrt(double)
    description: Calculates the cube root of a number. For example, `cbrt(27.0)` returns `3.0`, which is the cube root of 27.0.
    identifier: cbrt
  - name: copy_sign(double, double)
    description: Returns the first argument with the sign of the second argument. For example, `copy_sign(2.0, -3.0)` returns `-2.0`.
    identifier: copy_sign
  - name: expm1(double)
    description: Calculates e^x - 1 with better precision for small values. For example, `expm1(0.0)` returns `0.0`, as `e^0 - 1 = 0`.
    identifier: expm1
  - name: hypot(double, double)
    description: Computes sqrt(x² + y²) without intermediate overflow or underflow. For example, `hypot(3.0, 4.0)` returns `5.0`, which is the hypotenuse of a 3-4-5 triangle.
    identifier: hypot
  - name: log1p(double)
    description: Computes the natural logarithm of 1 + x (log(1 + x)) accurately for small x. For example, `log1p(0.0)` returns `0.0` as `log(1 + 0) = 0`.
    identifier: log1p
  - name: next_after(double, double)
    description: Returns the next floating-point number towards the direction of the second argument. For example, `next_after(1.0, 2.0)` returns the next representable number after 1.0.
    identifier: next_after
  - name: scalb(double, bigint)
    description: Multiplies a floating-point number by 2 raised to the power of an integer. For example, `scalb(1.0, 3)` returns `8.0` as `1.0 * 2^3 = 8.0`.
    identifier: scalb
  - name: ulp(double)
    description: Returns the size of the unit in the last place (ULP) of the argument. For example, `ulp(1.0)` returns the ULP of 1.0.
    identifier: ulp
  - name: binomial_distribution(bigint, double, bigint)
    description: Calculates the probability of obtaining a number of successes in a fixed number of trials for a binomial distribution. For example, `binomial_distribution(10, 0.5, 5)` returns the probability of 5 successes out of 10 trials with a 50% success rate.
    identifier: binomial_distribution
  - name: exponential_distribution(double, double)
    description: Evaluates the probability density or cumulative distribution of an exponential distribution. For example, `exponential_distribution(1.0, 2.0)` returns the exponential distribution's probability for a given rate and time.
    identifier: exponential_distribution
  - name: normal_distribution(double, double, double)
    description: Evaluates the cumulative distribution function for a normal (Gaussian) distribution. For example, `normal_distribution(0.0, 1.0, 1.0)` returns the probability for a standard normal distribution with mean 0 and standard deviation 1.
    identifier: normal_distribution
  - name: poisson_distribution(double, bigint)
    description: Evaluates the probability mass function of a Poisson-distributed random variable. For example, `poisson_distribution(1.0, 5)` returns the probability of observing 5 events when the average event rate is 1.0.
    identifier: poisson_distribution

iceberg:
  - name: read_partition_sizes(String warehouse, String catalogType, String catalogName, String databaseName, String tableName)
    description: Calculates the total size on disk in bytes for each partition in an Iceberg table.
    identifier: read_partition_sizes
  - name: delete_duplicated_data(String warehouse, String catalogType, String catalogName, String databaseName, String tableName, Long maxTimeBucket, Map[Map[String, String], Integer] partitionSet)
    description: Deletes duplicated data from an Iceberg table based on partition specifications and time bucket constraints.
    identifier: delete_duplicated_data

openai:
  - name: completions(String prompt, String model_name)
    description: Generates a completion for the given prompt using the specified OpenAI model. For example, `completions('What is AI?', 'gpt-4o')` returns a possible response to the prompt.
    identifier: completions
    requirement: Set OPENAI_API_KEY environment variable
  - name: completions(String prompt, String model_name, Integer maxOutputTokens)
    description: Generates a completion for the given prompt using the specified OpenAI model, with an upper limit on the number of output tokens. For example, `completions('What is AI?', 'gpt-4o', 100)` returns a possible response to the prompt, limited to 100 characters.
    identifier: completions
    requirement: Set OPENAI_API_KEY environment variable
  - name: completions(String prompt, String model_name, Integer maxOutputTokens, Double temperature)
    description: Generates a completion for the given prompt using the specified OpenAI model, with an upper limit on the number of output tokens and a specified temperature. For example, `completions('What is AI?', 'gpt-4o', 100, 0.5)` returns a possible response to the prompt, limited to 100 characters and weighted by a temperature of 0.5.
    identifier: completions
    requirement: Set OPENAI_API_KEY environment variable
  - name: completions(String prompt, String model_name, Integer maxOutputTokens, Double temperature, Double topP)
    description: Generates a completion for the given prompt using the specified OpenAI model, with an upper limit on the number of output tokens, a specified temperature, and a specified top-p value. For example, `completions('What is AI?', 'gpt-4o', 100, 0.5, 0.9)` returns a possible response to the prompt, limited to 100 characters, weighted by a temperature of 0.5, and with a top-p value of 0.9.
    identifier: completions
    requirement: Set OPENAI_API_KEY environment variable
  - name: extract_json(String prompt, String model_name)
    description: Extracts JSON data from the given prompt using the specified OpenAI model. For example, `extract_json('What is AI?', 'gpt-4o')` returns any relevant JSON data for the prompt.
    identifier: extract_json
    requirement: Set OPENAI_API_KEY environment variable
  - name: extract_json(String prompt, String model_name, Double temperature)
    description: Extracts JSON data from the given prompt using the specified OpenAI model and a specified temperature. For example, `extract_json('What is AI?', 'gpt-4o', 0.5)` returns any relevant JSON data for the prompt, weighted by a temperature of 0.5.
    identifier: extract_json
    requirement: Set OPENAI_API_KEY environment variable
  - name: extract_json(String prompt, String model_name, Double temperature, Double topP)
    description: Extracts JSON data from the given prompt using the specified OpenAI model, with a specified temperature and top-p value. For example, `extract_json('What is AI?', 'gpt-4o', 0.5, 0.9)` returns any relevant JSON data for the prompt, weighted by a temperature of 0.5 and with a top-p value of 0.9.
    identifier: extract_json
    requirement: Set OPENAI_API_KEY environment variable
  - name: vector_embed(String text, String model_name)
    description: Embeds the given text into a vector using the specified OpenAI model. For example, `vector_embed('What is AI?', 'text-embedding-ada-002')` returns a vector representation of the text.
    identifier: vector_embed
    requirement: Set OPENAI_API_KEY environment variable