Skip to content

Commit ab69bb0

Browse files
Chen-Yuan-LaiCheng-Yuan-Lai
andauthored
doc-gen: migrate scalar functions (string) documentation 4/4 (#13927)
* doc-gen: migrate scalar functions (string) documentation 4/4 * fix: fix typo and update function docs --------- Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
1 parent 383f279 commit ab69bb0

File tree

9 files changed

+186
-228
lines changed

9 files changed

+186
-228
lines changed

datafusion/functions/src/unicode/left.rs

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::any::Any;
1919
use std::cmp::Ordering;
20-
use std::sync::{Arc, OnceLock};
20+
use std::sync::Arc;
2121

2222
use arrow::array::{
2323
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
@@ -31,12 +31,28 @@ use datafusion_common::cast::{
3131
};
3232
use datafusion_common::exec_err;
3333
use datafusion_common::Result;
34-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
3534
use datafusion_expr::TypeSignature::Exact;
3635
use datafusion_expr::{
3736
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3837
};
38+
use datafusion_macros::user_doc;
3939

40+
#[user_doc(
41+
doc_section(label = "String Functions"),
42+
description = "Returns a specified number of characters from the left side of a string.",
43+
syntax_example = "left(str, n)",
44+
sql_example = r#"```sql
45+
> select left('datafusion', 4);
46+
+-----------------------------------+
47+
| left(Utf8("datafusion"),Int64(4)) |
48+
+-----------------------------------+
49+
| data |
50+
+-----------------------------------+
51+
```"#,
52+
standard_argument(name = "str", prefix = "String"),
53+
argument(name = "n", description = "Number of characters to return."),
54+
related_udf(name = "right")
55+
)]
4056
#[derive(Debug)]
4157
pub struct LeftFunc {
4258
signature: Signature,
@@ -99,36 +115,10 @@ impl ScalarUDFImpl for LeftFunc {
99115
}
100116

101117
fn documentation(&self) -> Option<&Documentation> {
102-
Some(get_left_doc())
118+
self.doc()
103119
}
104120
}
105121

106-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
107-
108-
fn get_left_doc() -> &'static Documentation {
109-
DOCUMENTATION.get_or_init(|| {
110-
Documentation::builder(
111-
DOC_SECTION_STRING,
112-
"Returns a specified number of characters from the left side of a string.",
113-
"left(str, n)",
114-
)
115-
.with_sql_example(
116-
r#"```sql
117-
> select left('datafusion', 4);
118-
+-----------------------------------+
119-
| left(Utf8("datafusion"),Int64(4)) |
120-
+-----------------------------------+
121-
| data |
122-
+-----------------------------------+
123-
```"#,
124-
)
125-
.with_standard_argument("str", Some("String"))
126-
.with_argument("n", "Number of characters to return.")
127-
.with_related_udf("right")
128-
.build()
129-
})
130-
}
131-
132122
/// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
133123
/// left('abcde', 2) = 'ab'
134124
/// The implementation uses UTF-8 code points as characters

datafusion/functions/src/unicode/lpad.rs

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::any::Any;
1919
use std::fmt::Write;
20-
use std::sync::{Arc, OnceLock};
20+
use std::sync::Arc;
2121

2222
use arrow::array::{
2323
Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
@@ -31,12 +31,32 @@ use crate::strings::StringArrayType;
3131
use crate::utils::{make_scalar_function, utf8_to_str_type};
3232
use datafusion_common::cast::as_int64_array;
3333
use datafusion_common::{exec_err, Result};
34-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
3534
use datafusion_expr::TypeSignature::Exact;
3635
use datafusion_expr::{
3736
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3837
};
38+
use datafusion_macros::user_doc;
3939

40+
#[user_doc(
41+
doc_section(label = "String Functions"),
42+
description = "Pads the left side of a string with another string to a specified string length.",
43+
syntax_example = "lpad(str, n[, padding_str])",
44+
sql_example = r#"```sql
45+
> select lpad('Dolly', 10, 'hello');
46+
+---------------------------------------------+
47+
| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) |
48+
+---------------------------------------------+
49+
| helloDolly |
50+
+---------------------------------------------+
51+
```"#,
52+
standard_argument(name = "str", prefix = "String"),
53+
argument(name = "n", description = "String length to pad to."),
54+
argument(
55+
name = "padding_str",
56+
description = "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._"
57+
),
58+
related_udf(name = "rpad")
59+
)]
4060
#[derive(Debug)]
4161
pub struct LPadFunc {
4262
signature: Signature,
@@ -103,34 +123,10 @@ impl ScalarUDFImpl for LPadFunc {
103123
}
104124

105125
fn documentation(&self) -> Option<&Documentation> {
106-
Some(get_lpad_doc())
126+
self.doc()
107127
}
108128
}
109129

110-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
111-
112-
fn get_lpad_doc() -> &'static Documentation {
113-
DOCUMENTATION.get_or_init(|| {
114-
Documentation::builder(
115-
DOC_SECTION_STRING,
116-
"Pads the left side of a string with another string to a specified string length.",
117-
"lpad(str, n[, padding_str])")
118-
.with_sql_example(r#"```sql
119-
> select lpad('Dolly', 10, 'hello');
120-
+---------------------------------------------+
121-
| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) |
122-
+---------------------------------------------+
123-
| helloDolly |
124-
+---------------------------------------------+
125-
```"#)
126-
.with_standard_argument("str", Some("String"))
127-
.with_argument("n", "String length to pad to.")
128-
.with_argument("padding_str", "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._")
129-
.with_related_udf("rpad")
130-
.build()
131-
})
132-
}
133-
134130
/// Extends the string to length 'length' by prepending the characters fill (a space by default).
135131
/// If the string is already longer than length then it is truncated (on the right).
136132
/// lpad('hi', 5, 'xy') = 'xyxhi'

datafusion/functions/src/unicode/right.rs

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::any::Any;
1919
use std::cmp::{max, Ordering};
20-
use std::sync::{Arc, OnceLock};
20+
use std::sync::Arc;
2121

2222
use arrow::array::{
2323
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
@@ -31,12 +31,28 @@ use datafusion_common::cast::{
3131
};
3232
use datafusion_common::exec_err;
3333
use datafusion_common::Result;
34-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
3534
use datafusion_expr::TypeSignature::Exact;
3635
use datafusion_expr::{
3736
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3837
};
38+
use datafusion_macros::user_doc;
3939

40+
#[user_doc(
41+
doc_section(label = "String Functions"),
42+
description = "Returns a specified number of characters from the right side of a string.",
43+
syntax_example = "right(str, n)",
44+
sql_example = r#"```sql
45+
> select right('datafusion', 6);
46+
+------------------------------------+
47+
| right(Utf8("datafusion"),Int64(6)) |
48+
+------------------------------------+
49+
| fusion |
50+
+------------------------------------+
51+
```"#,
52+
standard_argument(name = "str", prefix = "String"),
53+
argument(name = "n", description = "Number of characters to return."),
54+
related_udf(name = "left")
55+
)]
4056
#[derive(Debug)]
4157
pub struct RightFunc {
4258
signature: Signature,
@@ -99,36 +115,10 @@ impl ScalarUDFImpl for RightFunc {
99115
}
100116

101117
fn documentation(&self) -> Option<&Documentation> {
102-
Some(get_right_doc())
118+
self.doc()
103119
}
104120
}
105121

106-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
107-
108-
fn get_right_doc() -> &'static Documentation {
109-
DOCUMENTATION.get_or_init(|| {
110-
Documentation::builder(
111-
DOC_SECTION_STRING,
112-
"Returns a specified number of characters from the right side of a string.",
113-
"right(str, n)",
114-
)
115-
.with_sql_example(
116-
r#"```sql
117-
> select right('datafusion', 6);
118-
+------------------------------------+
119-
| right(Utf8("datafusion"),Int64(6)) |
120-
+------------------------------------+
121-
| fusion |
122-
+------------------------------------+
123-
```"#,
124-
)
125-
.with_standard_argument("str", Some("String"))
126-
.with_argument("n", "Number of characters to return")
127-
.with_related_udf("left")
128-
.build()
129-
})
130-
}
131-
132122
/// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
133123
/// right('abcde', 2) = 'de'
134124
/// The implementation uses UTF-8 code points as characters

datafusion/functions/src/unicode/rpad.rs

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,37 @@ use arrow::datatypes::DataType;
2525
use datafusion_common::cast::as_int64_array;
2626
use datafusion_common::DataFusionError;
2727
use datafusion_common::{exec_err, Result};
28-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
2928
use datafusion_expr::TypeSignature::Exact;
3029
use datafusion_expr::{
3130
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3231
};
32+
use datafusion_macros::user_doc;
3333
use std::any::Any;
3434
use std::fmt::Write;
35-
use std::sync::{Arc, OnceLock};
35+
use std::sync::Arc;
3636
use unicode_segmentation::UnicodeSegmentation;
3737
use DataType::{LargeUtf8, Utf8, Utf8View};
3838

39+
#[user_doc(
40+
doc_section(label = "String Functions"),
41+
description = "Pads the right side of a string with another string to a specified string length.",
42+
syntax_example = "rpad(str, n[, padding_str])",
43+
sql_example = r#"```sql
44+
> select rpad('datafusion', 20, '_-');
45+
+-----------------------------------------------+
46+
| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) |
47+
+-----------------------------------------------+
48+
| datafusion_-_-_-_-_- |
49+
+-----------------------------------------------+
50+
```"#,
51+
standard_argument(name = "str", prefix = "String"),
52+
argument(name = "n", description = "String length to pad to."),
53+
argument(
54+
name = "padding_str",
55+
description = "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._"
56+
),
57+
related_udf(name = "lpad")
58+
)]
3959
#[derive(Debug)]
4060
pub struct RPadFunc {
4161
signature: Signature,
@@ -122,38 +142,10 @@ impl ScalarUDFImpl for RPadFunc {
122142
}
123143

124144
fn documentation(&self) -> Option<&Documentation> {
125-
Some(get_rpad_doc())
145+
self.doc()
126146
}
127147
}
128148

129-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
130-
131-
fn get_rpad_doc() -> &'static Documentation {
132-
DOCUMENTATION.get_or_init(|| {
133-
Documentation::builder(
134-
DOC_SECTION_STRING,
135-
"Pads the right side of a string with another string to a specified string length.",
136-
"rpad(str, n[, padding_str])")
137-
.with_sql_example(r#"```sql
138-
> select rpad('datafusion', 20, '_-');
139-
+-----------------------------------------------+
140-
| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) |
141-
+-----------------------------------------------+
142-
| datafusion_-_-_-_-_- |
143-
+-----------------------------------------------+
144-
```"#)
145-
.with_standard_argument(
146-
"str",
147-
Some("String"),
148-
)
149-
.with_argument("n", "String length to pad to.")
150-
.with_argument("padding_str",
151-
"String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._")
152-
.with_related_udf("lpad")
153-
.build()
154-
})
155-
}
156-
157149
pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
158150
args: &[ArrayRef],
159151
) -> Result<ArrayRef> {

datafusion/functions/src/unicode/strpos.rs

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,34 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::sync::{Arc, OnceLock};
19+
use std::sync::Arc;
2020

2121
use crate::strings::StringArrayType;
2222
use crate::utils::{make_scalar_function, utf8_to_int_type};
2323
use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
2424
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
2525
use datafusion_common::{exec_err, Result};
26-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
2726
use datafusion_expr::{
2827
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
2928
};
29+
use datafusion_macros::user_doc;
3030

31+
#[user_doc(
32+
doc_section(label = "String Functions"),
33+
description = "Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.",
34+
syntax_example = "strpos(str, substr)",
35+
alternative_syntax = "position(substr in origstr)",
36+
sql_example = r#"```sql
37+
> select strpos('datafusion', 'fus');
38+
+----------------------------------------+
39+
| strpos(Utf8("datafusion"),Utf8("fus")) |
40+
+----------------------------------------+
41+
| 5 |
42+
+----------------------------------------+
43+
```"#,
44+
standard_argument(name = "str", prefix = "String"),
45+
argument(name = "substr", description = "Substring expression to search for.")
46+
)]
3147
#[derive(Debug)]
3248
pub struct StrposFunc {
3349
signature: Signature,
@@ -79,33 +95,10 @@ impl ScalarUDFImpl for StrposFunc {
7995
}
8096

8197
fn documentation(&self) -> Option<&Documentation> {
82-
Some(get_strpos_doc())
98+
self.doc()
8399
}
84100
}
85101

86-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
87-
88-
fn get_strpos_doc() -> &'static Documentation {
89-
DOCUMENTATION.get_or_init(|| {
90-
Documentation::builder(
91-
DOC_SECTION_STRING,
92-
"Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.",
93-
"strpos(str, substr)")
94-
.with_sql_example(r#"```sql
95-
> select strpos('datafusion', 'fus');
96-
+----------------------------------------+
97-
| strpos(Utf8("datafusion"),Utf8("fus")) |
98-
+----------------------------------------+
99-
| 5 |
100-
+----------------------------------------+
101-
```"#)
102-
.with_standard_argument("str", Some("String"))
103-
.with_argument("substr", "Substring expression to search for.")
104-
.with_alternative_syntax("position(substr in origstr)")
105-
.build()
106-
})
107-
}
108-
109102
fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
110103
match (args[0].data_type(), args[1].data_type()) {
111104
(DataType::Utf8, DataType::Utf8) => {

0 commit comments

Comments
 (0)