2222import com .google .api .services .bigquery .model .JobReference ;
2323import com .google .api .services .bigquery .model .Table ;
2424import com .google .api .services .bigquery .model .TableReference ;
25+ import com .google .cloud .bigquery .RangePartitioning ;
2526import com .google .cloud .bigquery .StandardTableDefinition ;
2627import com .google .cloud .bigquery .TableDefinition .Type ;
2728import com .google .cloud .bigquery .TimePartitioning ;
5253import org .apache .hadoop .mapreduce .RecordReader ;
5354import org .apache .hadoop .mapreduce .lib .input .FileSplit ;
5455import org .apache .hadoop .util .Progressable ;
56+ import org .slf4j .Logger ;
57+ import org .slf4j .LoggerFactory ;
5558
5659import java .io .IOException ;
5760import java .security .GeneralSecurityException ;
6871 */
6972public class PartitionedBigQueryInputFormat extends AbstractBigQueryInputFormat <LongWritable , GenericData .Record > {
7073
74+ private static final Logger LOG = LoggerFactory .getLogger (PartitionedBigQueryInputFormat .class );
7175 private InputFormat <LongWritable , GenericData .Record > delegateInputFormat =
7276 new AvroBigQueryInputFormatWithScopes ();
7377
@@ -132,19 +136,27 @@ private void processQuery(JobContext context) throws IOException, InterruptedExc
132136 String partitionFromDate = configuration .get (BigQueryConstants .CONFIG_PARTITION_FROM_DATE , null );
133137 String partitionToDate = configuration .get (BigQueryConstants .CONFIG_PARTITION_TO_DATE , null );
134138 String filter = configuration .get (BigQueryConstants .CONFIG_FILTER , null );
139+ String limit = configuration .get (BigQueryConstants .CONFIG_LIMIT , null );
140+ String orderBy = configuration .get (BigQueryConstants .CONFIG_ORDER_BY , null );
135141 Integer readTimeout = configuration .getInt (BigQueryConstants .CONFIG_BQ_HTTP_READ_TIMEOUT ,
136142 GCPUtils .BQ_DEFAULT_READ_TIMEOUT_SECONDS );
137143
138144 com .google .cloud .bigquery .Table bigQueryTable = BigQueryUtil .getBigQueryTable (
139- datasetProjectId , datasetId , tableName , serviceAccount , isServiceAccountFilePath , null , readTimeout );
145+ datasetProjectId , datasetId , tableName , serviceAccount , isServiceAccountFilePath , null ,
146+ readTimeout );
140147 Type type = Objects .requireNonNull (bigQueryTable ).getDefinition ().getType ();
148+ Boolean isPartitionFilterRequired = bigQueryTable .getRequirePartitionFilter ();
149+ StandardTableDefinition tableDefinition = Objects .requireNonNull (bigQueryTable ).getDefinition ();
141150
142151 String query ;
143152 if (type == Type .VIEW || type == Type .MATERIALIZED_VIEW || type == Type .EXTERNAL ) {
144- query = generateQueryForMaterializingView (datasetProjectId , datasetId , tableName , filter );
153+ query = generateQueryForMaterializingView (datasetProjectId , datasetId , tableName , filter ,
154+ limit , orderBy );
145155 } else {
146- query = generateQuery (partitionFromDate , partitionToDate , filter , projectId , datasetProjectId , datasetId ,
147- tableName , serviceAccount , isServiceAccountFilePath );
156+ query = generateQuery (partitionFromDate , partitionToDate , filter , datasetProjectId ,
157+ datasetId ,
158+ tableName , limit , orderBy ,
159+ isPartitionFilterRequired , tableDefinition );
148160 }
149161
150162 if (query != null ) {
@@ -166,30 +178,41 @@ private void processQuery(JobContext context) throws IOException, InterruptedExc
166178 }
167179
168180 @ VisibleForTesting
169- String generateQuery (String partitionFromDate , String partitionToDate , String filter , String project ,
170- String datasetProject , String dataset , String table , @ Nullable String serviceAccount ,
171- @ Nullable Boolean isServiceAccountFilePath ) {
172- if (partitionFromDate == null && partitionToDate == null && filter == null ) {
181+ String generateQuery (String partitionFromDate , String partitionToDate , String filter ,
182+ String datasetProject , String dataset , String table , String limit , String orderBy ,
183+ Boolean isPartitionFilterRequired , StandardTableDefinition tableDefinition ) {
184+
185+ if (Strings .isNullOrEmpty (filter ) && Strings .isNullOrEmpty (orderBy ) && Strings .isNullOrEmpty (
186+ limit )
187+ && Strings .isNullOrEmpty (partitionFromDate ) && Strings .isNullOrEmpty (partitionToDate )) {
173188 return null ;
174189 }
175- String queryTemplate = "select * from `%s` where %s" ;
176- com .google .cloud .bigquery .Table sourceTable =
177- BigQueryUtil .getBigQueryTable (datasetProject , dataset , table , serviceAccount , isServiceAccountFilePath , null ,
178- null );
179- StandardTableDefinition tableDefinition = Objects .requireNonNull (sourceTable ).getDefinition ();
190+
191+ RangePartitioning rangePartitioning = tableDefinition .getRangePartitioning ();
180192 TimePartitioning timePartitioning = tableDefinition .getTimePartitioning ();
181- if (timePartitioning == null && filter == null ) {
182- return null ;
183- }
184193 StringBuilder condition = new StringBuilder ();
194+ String partitionCondition = null ;
185195
186196 if (timePartitioning != null ) {
187- String timePartitionCondition = BigQueryUtil .generateTimePartitionCondition (tableDefinition , partitionFromDate ,
188- partitionToDate );
189- condition .append (timePartitionCondition );
197+ if (partitionFromDate == null && partitionToDate == null
198+ && Objects .equals (isPartitionFilterRequired , Boolean .TRUE )) {
199+ partitionCondition = BigQueryUtil .generateDefaultTimePartitionCondition (tableDefinition );
200+ } else if (partitionFromDate != null || partitionToDate != null ) {
201+ partitionCondition =
202+ BigQueryUtil .generateTimePartitionCondition (tableDefinition , partitionFromDate ,
203+ partitionToDate );
204+ }
205+ } else if (rangePartitioning != null && Objects .equals (isPartitionFilterRequired ,
206+ Boolean .TRUE )) {
207+ partitionCondition = BigQueryUtil .generateDefaultRangePartitionCondition (
208+ tableDefinition );
190209 }
191210
192- if (filter != null ) {
211+ if (!Strings .isNullOrEmpty (partitionCondition )) {
212+ condition .append ("(" ).append (partitionCondition ).append (")" );
213+ }
214+
215+ if (!Strings .isNullOrEmpty (filter )) {
193216 if (condition .length () == 0 ) {
194217 condition .append (filter );
195218 } else {
@@ -198,20 +221,42 @@ String generateQuery(String partitionFromDate, String partitionToDate, String fi
198221 }
199222
200223 String tableName = datasetProject + "." + dataset + "." + table ;
201- return String .format (queryTemplate , tableName , condition .toString ());
224+ StringBuilder query = new StringBuilder ("select * from " ).append (tableName );
225+
226+ if (condition .length () > 0 ) {
227+ query .append (" where " ).append (condition );
228+ }
229+
230+ if (!Strings .isNullOrEmpty (orderBy )) {
231+ query .append (" order by " ).append (orderBy );
232+ }
233+
234+ if (!Strings .isNullOrEmpty (limit )) {
235+ query .append (" limit " ).append (limit );
236+ }
237+
238+ LOG .debug ("Generated BigQuery query for job: {}" , query );
239+ return query .toString ();
202240 }
203241
204242 @ VisibleForTesting
205- String generateQueryForMaterializingView (String datasetProject , String dataset , String table , String filter ) {
206- String queryTemplate = "select * from `%s`%s" ;
207- StringBuilder condition = new StringBuilder ( );
208-
243+ String generateQueryForMaterializingView (String datasetProject , String dataset , String table ,
244+ String filter , String limit , String orderBy ) {
245+ String tableName = String . format ( "`%s.%s.%s`" , datasetProject , dataset , table );
246+ StringBuilder query = new StringBuilder ( "select * from " ). append ( tableName );
209247 if (!Strings .isNullOrEmpty (filter )) {
210- condition .append (String . format ( " where %s" , filter ) );
248+ query .append (" where " ). append ( filter );
211249 }
212250
213- String tableName = datasetProject + "." + dataset + "." + table ;
214- return String .format (queryTemplate , tableName , condition .toString ());
251+ if (!Strings .isNullOrEmpty (orderBy )) {
252+ query .append (" order by " ).append (orderBy );
253+ }
254+
255+ if (!Strings .isNullOrEmpty (limit )) {
256+ query .append (" limit " ).append (limit );
257+ }
258+
259+ return query .toString ();
215260 }
216261
217262 /**
0 commit comments