|
17 | 17 |
|
18 | 18 | package org.apache.spark.sql.sources.v2;
|
19 | 19 |
|
| 20 | +import java.io.IOException; |
20 | 21 | import java.util.HashMap;
|
21 | 22 | import java.util.Locale;
|
22 | 23 | import java.util.Map;
|
23 | 24 | import java.util.Optional;
|
| 25 | +import java.util.stream.Stream; |
| 26 | + |
| 27 | +import com.fasterxml.jackson.databind.ObjectMapper; |
24 | 28 |
|
25 | 29 | import org.apache.spark.annotation.InterfaceStability;
|
26 | 30 |
|
27 | 31 | /**
|
28 | 32 | * An immutable string-to-string map in which keys are case-insensitive. This is used to represent
|
29 | 33 | * data source options.
|
| 34 | + * |
| 35 | + * Each data source implementation can define its own options and teach its users how to set them. |
| 36 | + * Spark doesn't have any restrictions about what options a data source should or should not have. |
| 37 | + * Instead Spark defines some standard options that data sources can optionally adopt. It's possible |
| 38 | + * that some options are very common and many data sources use them. However different data |
| 39 | + * sources may define the common options(key and meaning) differently, which is quite confusing to |
| 40 | + * end users. |
| 41 | + * |
| 42 | + * The standard options defined by Spark: |
| 43 | + * <table summary="standard data source options"> |
| 44 | + * <tr> |
| 45 | + * <th><b>Option key</b></th> |
| 46 | + * <th><b>Option value</b></th> |
| 47 | + * </tr> |
| 48 | + * <tr> |
| 49 | + * <td>path</td> |
| 50 | + * <td>A path string of the data files/directories, like |
| 51 | + * <code>path1</code>, <code>/absolute/file2</code>, <code>path3/*</code>. The path can |
| 52 | + * either be relative or absolute, points to either file or directory, and can contain |
| 53 | + * wildcards. This option is commonly used by file-based data sources.</td> |
| 54 | + * </tr> |
| 55 | + * <tr> |
| 56 | + * <td>paths</td> |
| 57 | + * <td>A JSON array style paths string of the data files/directories, like |
| 58 | + * <code>["path1", "/absolute/file2"]</code>. The format of each path is same as the |
| 59 | + * <code>path</code> option, plus it should follow JSON string literal format, e.g. quotes |
| 60 | + * should be escaped, <code>pa\"th</code> means pa"th. |
| 61 | + * </td> |
| 62 | + * </tr> |
| 63 | + * <tr> |
| 64 | + * <td>table</td> |
| 65 | + * <td>A table name string representing the table name directly without any interpretation. |
| 66 | + * For example, <code>db.tbl</code> means a table called db.tbl, not a table called tbl |
| 67 | + * inside database db. <code>`t*b.l`</code> means a table called `t*b.l`, not t*b.l.</td> |
| 68 | + * </tr> |
| 69 | + * <tr> |
| 70 | + * <td>database</td> |
| 71 | + * <td>A database name string representing the database name directly without any |
| 72 | + * interpretation, which is very similar to the table name option.</td> |
| 73 | + * </tr> |
| 74 | + * </table> |
30 | 75 | */
|
31 | 76 | @InterfaceStability.Evolving
|
32 | 77 | public class DataSourceOptions {
|
@@ -97,4 +142,59 @@ public double getDouble(String key, double defaultValue) {
|
97 | 142 | return keyLowerCasedMap.containsKey(lcaseKey) ?
|
98 | 143 | Double.parseDouble(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
|
99 | 144 | }
|
| 145 | + |
| 146 | + /** |
| 147 | + * The option key for singular path. |
| 148 | + */ |
| 149 | + public static final String PATH_KEY = "path"; |
| 150 | + |
| 151 | + /** |
| 152 | + * The option key for multiple paths. |
| 153 | + */ |
| 154 | + public static final String PATHS_KEY = "paths"; |
| 155 | + |
| 156 | + /** |
| 157 | + * The option key for table name. |
| 158 | + */ |
| 159 | + public static final String TABLE_KEY = "table"; |
| 160 | + |
| 161 | + /** |
| 162 | + * The option key for database name. |
| 163 | + */ |
| 164 | + public static final String DATABASE_KEY = "database"; |
| 165 | + |
| 166 | + /** |
| 167 | + * Returns all the paths specified by both the singular path option and the multiple |
| 168 | + * paths option. |
| 169 | + */ |
| 170 | + public String[] paths() { |
| 171 | + String[] singularPath = |
| 172 | + get(PATH_KEY).map(s -> new String[]{s}).orElseGet(() -> new String[0]); |
| 173 | + Optional<String> pathsStr = get(PATHS_KEY); |
| 174 | + if (pathsStr.isPresent()) { |
| 175 | + ObjectMapper objectMapper = new ObjectMapper(); |
| 176 | + try { |
| 177 | + String[] paths = objectMapper.readValue(pathsStr.get(), String[].class); |
| 178 | + return Stream.of(singularPath, paths).flatMap(Stream::of).toArray(String[]::new); |
| 179 | + } catch (IOException e) { |
| 180 | + return singularPath; |
| 181 | + } |
| 182 | + } else { |
| 183 | + return singularPath; |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + /** |
| 188 | + * Returns the value of the table name option. |
| 189 | + */ |
| 190 | + public Optional<String> tableName() { |
| 191 | + return get(TABLE_KEY); |
| 192 | + } |
| 193 | + |
| 194 | + /** |
| 195 | + * Returns the value of the database name option. |
| 196 | + */ |
| 197 | + public Optional<String> databaseName() { |
| 198 | + return get(DATABASE_KEY); |
| 199 | + } |
100 | 200 | }
|
0 commit comments