Skip to content

Commit 09f96a4

Browse files
committed
apply review suggestions
1 parent ad3a2ff commit 09f96a4

File tree

7 files changed

+79
-129
lines changed

7 files changed

+79
-129
lines changed

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.util.Optional;
3434

3535
import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith;
36+
import static java.util.Objects.requireNonNull;
3637

3738
public class ClpPlanOptimizer
3839
implements ConnectorPlanOptimizer
@@ -44,8 +45,8 @@ public class ClpPlanOptimizer
4445
public ClpPlanOptimizer(FunctionMetadataManager functionManager,
4546
StandardFunctionResolution functionResolution)
4647
{
47-
this.functionManager = functionManager;
48-
this.functionResolution = functionResolution;
48+
this.functionManager = requireNonNull(functionManager, "functionManager is null");
49+
this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
4950
}
5051

5152
@Override

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,16 @@ public class ClpSplit
3434
{
3535
private final SchemaTableName schemaTableName;
3636
private final String archivePath;
37-
private final Optional<String> query;
37+
private final Optional<String> kqlQuery;
3838

3939
@JsonCreator
4040
public ClpSplit(@JsonProperty("schemaTableName") @Nullable SchemaTableName schemaTableName,
4141
@JsonProperty("archivePath") @Nullable String archivePath,
42-
@JsonProperty("query") Optional<String> query)
42+
@JsonProperty("kqlQuery") Optional<String> kqlQuery)
4343
{
4444
this.schemaTableName = schemaTableName;
4545
this.archivePath = archivePath;
46-
this.query = query;
46+
this.kqlQuery = kqlQuery;
4747
}
4848

4949
@JsonProperty
@@ -60,9 +60,9 @@ public String getArchivePath()
6060
}
6161

6262
@JsonProperty
63-
public Optional<String> getQuery()
63+
public Optional<String> getKqlQuery()
6464
{
65-
return query;
65+
return kqlQuery;
6666
}
6767

6868
@Override

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplitManager.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323

2424
import javax.inject.Inject;
2525

26+
import static java.util.Objects.requireNonNull;
27+
2628
public class ClpSplitManager
2729
implements ConnectorSplitManager
2830
{
@@ -31,7 +33,7 @@ public class ClpSplitManager
3133
@Inject
3234
public ClpSplitManager(ClpSplitProvider clpSplitProvider)
3335
{
34-
this.clpSplitProvider = clpSplitProvider;
36+
this.clpSplitProvider = requireNonNull(clpSplitProvider, "clpSplitProvider is null");
3537
}
3638

3739
@Override

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ public class ClpTableLayoutHandle
2424
implements ConnectorTableLayoutHandle
2525
{
2626
private final ClpTableHandle table;
27-
private final Optional<String> query;
27+
private final Optional<String> kqlQuery;
2828

2929
@JsonCreator
3030
public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table,
31-
@JsonProperty("query") Optional<String> query)
31+
@JsonProperty("query") Optional<String> kqlQuery)
3232
{
3333
this.table = table;
34-
this.query = query;
34+
this.kqlQuery = kqlQuery;
3535
}
3636

3737
@JsonProperty
@@ -41,9 +41,9 @@ public ClpTableHandle getTable()
4141
}
4242

4343
@JsonProperty
44-
public Optional<String> getQuery()
44+
public Optional<String> getKqlQuery()
4545
{
46-
return query;
46+
return kqlQuery;
4747
}
4848

4949
@Override

presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public List<ClpSplit> listSplits(ClpTableLayoutHandle clpTableLayoutHandle)
9595
while (resultSet.next()) {
9696
final String archiveId = resultSet.getString("archive_id");
9797
final String archivePath = tablePath + "/" + archiveId;
98-
splits.add(new ClpSplit(tableSchemaName, archivePath, clpTableLayoutHandle.getQuery()));
98+
splits.add(new ClpSplit(tableSchemaName, archivePath, clpTableLayoutHandle.getKqlQuery()));
9999
}
100100
}
101101
}

presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ public void testListSplits()
133133
assertEquals(splits.size(), NUM_SPLITS);
134134
for (int i = 0; i < NUM_SPLITS; i++) {
135135
assertEquals(splits.get(i).getArchivePath(), "/tmp/archives/" + tableName + "/id_" + i);
136-
assertEquals(splits.get(i).getQuery(), Optional.empty());
136+
assertEquals(splits.get(i).getKqlQuery(), Optional.empty());
137137
}
138138
}
139139
}

presto-docs/src/main/sphinx/connector/clp.rst

Lines changed: 61 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
=======================
1+
=============
22
CLP Connector
3-
=======================
3+
=============
44

55
.. contents::
66
:local:
@@ -10,7 +10,7 @@ CLP Connector
1010
Overview
1111
--------
1212

13-
The CLP Connector enables SQL-based querying of CLP-S archives from Presto. This document describes how to setup the
13+
The CLP Connector enables SQL-based querying of CLP-S archives from Presto. This document describes how to set up the
1414
CLP Connector to run SQL queries.
1515

1616

@@ -39,116 +39,63 @@ Configuration Properties
3939

4040
The following configuration properties are available:
4141

42-
============================================= ==============================================================================
43-
Property Name Description
44-
============================================= ==============================================================================
45-
``clp.archive-source`` The source of the CLP archive.
46-
``clp.metadata-expire-interval`` The time interval after which metadata entries are considered expired.
47-
``clp.metadata-refresh-interval`` The frequency at which metadata is refreshed from the source.
48-
``clp.polymorphic-type-enabled`` Enables or disables support for polymorphic types within CLP.
49-
``clp.metadata-source`` The source from which metadata is fetched.
50-
``clp.metadata-db-url`` The connection URL for the metadata database.
51-
``clp.metadata-db-name`` The name of the metadata database.
52-
``clp.metadata-db-user`` The database user with access to the metadata database.
53-
``clp.metadata-db-password`` The password for the metadata database user.
54-
``clp.metadata-table-prefix`` A prefix applied to table names in the metadata database.
55-
``clp.split-source`` The source of split information for query execution.
56-
============================================= ==============================================================================
42+
================================== ======================================================================== =========
43+
Property Name Description Default
44+
================================== ======================================================================== =========
45+
``clp.archive-source`` Specifies the source of the CLP archive. Supported values include ``local``
46+
``local`` (local storage) and ``s3`` (Amazon S3).
47+
``clp.metadata-expire-interval`` Defines how long metadata entries remain valid before being considered 600
48+
expired, in seconds.
49+
``clp.metadata-refresh-interval`` Specifies how frequently metadata is refreshed from the source, in 60
50+
seconds. Set this to a lower value for frequently changing datasets or
51+
to a higher value to reduce load.
52+
``clp.polymorphic-type-enabled`` Enables or disables support for polymorphic types in CLP, allowing the ``false``
53+
same field to have different types. This is useful for schema-less,
54+
semi-structured data where the same field may appear with different
55+
types.
56+
57+
When enabled, type annotations are added to conflicting field names to
58+
distinguish between types. For example, if ``id`` column appears as both
59+
an ``int`` and ``string`` types, the connector will create two columns
60+
named ``id_bigint`` and ``id_varchar``.
61+
62+
Supported type annotations include ``bigint``, ``varchar``, ``double``,
63+
``boolean``, and ``array(varchar)`` (See `Data Types`_ for details). For
64+
columns with only one type, the original column name is used.
65+
``clp.metadata-provider-type`` Specifies the metadata provider type. Currently, the only supported ``mysql``
66+
type is a MySQL database, which is also used by the CLP package to store
67+
metadata. Additional providers can be supported by implementing the
68+
``ClpMetadataProvider`` interface.
69+
``clp.metadata-db-url`` The JDBC URL used to connect to the metadata database. This property is
70+
required if ``clp.metadata-source`` is set to ``mysql``.
71+
``clp.metadata-db-name`` The name of the metadata database. This option is required if
72+
``clp.metadata-source`` is set to ``mysql`` and the database name is not
73+
specified in the URL.
74+
``clp.metadata-db-user`` The database user with access to the metadata database.This option is
75+
required if ``clp.metadata-source`` is set to ``mysql`` and the database
76+
name is not specified in the URL.
77+
``clp.metadata-db-password`` The password for the metadata database user. This option is required if
78+
``clp.metadata-source`` is set to ``mysql``.
79+
``clp.metadata-table-prefix`` A string prefix prepended to all metadata table names when querying the
80+
database. Useful for namespacing or avoiding collisions. This option is
81+
required if ``clp.metadata-source`` is set to ``mysql``.
82+
``clp.split-provider-type`` Specifies the split provider type. By default, it uses the same type as ``mysql``
83+
the metadata provider with the same connection parameters. Additional
84+
types can be supported by implementing the ``ClpSplitProvider``
85+
interface.
86+
================================== ======================================================================== =========
5787

58-
``clp.archive-source``
59-
^^^^^^^^^^^^^^^^^^^^^^
6088

61-
Specifies the source of the CLP archive. Supported values include ``local`` (local storage) and ``s3`` (Amazon S3).
62-
63-
This property is optional. The default is ``local``.
64-
65-
``clp.metadata-expire-interval``
66-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
67-
Defines how long metadata entries remain valid before being considered expired, in seconds.
68-
69-
This property is optional. The default is ``600``.
70-
71-
``clp.metadata-refresh-interval``
72-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
73-
Specifies how frequently metadata is refreshed from the source, in seconds. This ensures that metadata remains up to
74-
date.
75-
76-
Set this to a lower value for frequently changing datasets or to a higher value to reduce load.
77-
78-
This property is optional. The default is ``60``.
79-
80-
``clp.polymorphic-type-enabled``
81-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
82-
Enables or disables support for polymorphic types in CLP, allowing the same field to have different types.
83-
This is useful for schema-less, semi-structured data where the same field may appear with different types.
84-
When enabled, type annotations are added to conflicting field names to distinguish between types. For example, if ``id``
85-
column appears as both an ``int`` and ``string`` types, the connector will create two columns named ``id_bigint`` and
86-
``id_varchar``.
87-
88-
Supported type annotations include ``bigint``, ``varchar``, ``double``, ``boolean``, and
89-
``array(varchar)`` (See `Data Types`_ for details). For columns with only one type, the original column name is used.
90-
91-
This property is optional. The default is ``false``.
92-
93-
``clp.metadata-source``
94-
^^^^^^^^^^^^^^^^^^^^^^^
95-
Currently, the only supported source is a MySQL database, which is also used by the CLP package to store metadata.
96-
Additional sources can be supported by implementing the ``ClpMetadataProvider`` interface.
97-
98-
This property is optional. The default is ``mysql``.
99-
100-
``clp.metadata-db-url``
101-
^^^^^^^^^^^^^^^^^^^^^^^
102-
The JDBC URL used to connect to the metadata database.
103-
104-
This property is required if ``clp.metadata-source`` is set to ``mysql``.
105-
106-
``clp.metadata-db-name``
107-
^^^^^^^^^^^^^^^^^^^^^^^^
108-
109-
The name of the metadata database.
110-
111-
This option is required if ``clp.metadata-source`` is set to ``mysql`` and the database name is not specified in the URL.
112-
113-
``clp.metadata-db-user``
114-
^^^^^^^^^^^^^^^^^^^^^^^^
115-
116-
The username used to authenticate with the metadata database.
117-
118-
Ensure this user has read access to the relevant metadata tables.
119-
120-
This option is required if ``clp.metadata-source`` is set to ``mysql``.
121-
122-
``clp.metadata-db-password``
123-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
124-
125-
The password for the user specified in ``clp.metadata-db-user``.
126-
127-
This option is required if ``clp.metadata-source`` is set to ``mysql``.
128-
129-
``clp.metadata-table-prefix``
130-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
131-
132-
A string prefix prepended to all metadata table names when querying the database. Useful for namespacing or avoiding
133-
collisions.
134-
135-
This option is optional. The default is empty.
136-
137-
``clp.split-source``
138-
^^^^^^^^^^^^^^^^^^^^
139-
140-
Specifies the source of split information for tables. By default, it uses the same source as the metadata with the same
141-
connection parameters. Additional sources can be supported by implementing the ``ClpSplitProvider`` interface.
142-
143-
This property is optional. The default is ``mysql``.
14489

14590
Metadata and Split Providers
14691
----------------------------
147-
As mentioned earlier, the CLP connector relies on metadata and split providers to retrieve information from various
148-
sources. By default, it uses a MySQL database for both metadata and split storage. We recommend using the CLP package
149-
for log ingestion, which automatically populates the database with the required information. However, if you prefer to
150-
use a different source—or the same source with a custom implementation—you can provide your own implementations of
151-
the ``ClpMetadataProvider`` and ``ClpSplitProvider`` interfaces, and configure the connector accordingly.
92+
The CLP connector relies on metadata and split providers to retrieve information from various sources. By default, it
93+
uses a MySQL database for both metadata and split storage. We recommend using the CLP package for log ingestion, which
94+
automatically populates the database with the required information.
95+
96+
If you prefer to use a different source—or the same source with a custom implementation—you can provide your own
97+
implementations of the ``ClpMetadataProvider`` and ``ClpSplitProvider`` interfaces, and configure the connector
98+
accordingly.
15299

153100
Data Types
154101
----------
@@ -172,7 +119,7 @@ CLP Type Presto Type
172119
String Types
173120
^^^^^^^^^^^^
174121

175-
In CLP, we have three distinct string types: ``ClpString`` (strings with whitespace), ``VarString`` (strings without
122+
CLP uses three distinct string types: ``ClpString`` (strings with whitespace), ``VarString`` (strings without
176123
whitespace), and ``DateString`` (strings representing dates). Currently, all three are mapped to Presto's ``VARCHAR``
177124
type.
178125

@@ -181,13 +128,13 @@ Array Types
181128

182129
CLP supports two array types: ``UnstructuredArray`` and ``StructuredArray``. Unstructured arrays are stored as strings
183130
in CLP and elements can be any type. However, in Presto arrays are homogeneous, so the elements are converted to strings
184-
when read. ``StructuredArray`` type is not supported yet.
131+
when read. ``StructuredArray`` type is not supported in Presto.
185132

186133
Object Types
187134
^^^^^^^^^^^^
188135
CLP stores metadata using a global schema tree structure that captures all possible fields from various log structures.
189136
Internal nodes may represent objects containing nested fields as their children. In Presto, we map these internal object
190-
nodes specifically to the ``ROW`` data type, including all subfields as fields within the ``ROW``.
137+
nodes to the ``ROW`` data type, including all subfields as fields within the ``ROW``.
191138

192139
For instance, consider a table containing two distinct JSON log types:
193140

@@ -223,9 +170,9 @@ following ``ROW`` type:
223170
224171
ROW(ts BIGINT, status VARCHAR, thread_num BIGINT, backtrace VARCHAR)
225172
226-
Each JSON log maps to this unified ``ROW`` type, with absent fields represented as ``NULL``. Thus, the child nodes
227-
(``ts``, ``status``, ``thread_num``, ``backtrace``) become fields within the ``ROW``, clearly reflecting the nested and
228-
varying structures of the original JSON logs.
173+
Each JSON log maps to this unified ``ROW`` type, with absent fields represented as ``NULL``. The child nodes (``ts``,
174+
``status``, ``thread_num``, ``backtrace``) become fields within the ``ROW``, clearly reflecting the nested and varying
175+
structures of the original JSON logs.
229176

230177
SQL support
231178
-----------

0 commit comments

Comments
 (0)