21
21
import inspect
22
22
import sys
23
23
import typing
24
- from typing import (
25
- Any ,
26
- Callable ,
27
- Dict ,
28
- IO ,
29
- Iterable ,
30
- List ,
31
- Literal ,
32
- MutableSequence ,
33
- Optional ,
34
- Sequence ,
35
- Tuple ,
36
- Union ,
37
- )
24
+ from typing import Any , Iterable , List , Literal , Optional , Sequence , Tuple , Union
38
25
39
26
import bigframes_vendored .constants as constants
40
27
import bigframes_vendored .pandas .core .reshape .concat as vendored_pandas_concat
41
28
import bigframes_vendored .pandas .core .reshape .encoding as vendored_pandas_encoding
42
29
import bigframes_vendored .pandas .core .reshape .merge as vendored_pandas_merge
43
30
import bigframes_vendored .pandas .core .reshape .tile as vendored_pandas_tile
44
31
import bigframes_vendored .pandas .core .tools .datetimes as vendored_pandas_datetimes
45
- import bigframes_vendored .pandas .io .gbq as vendored_pandas_gbq
46
- from google .cloud import bigquery
47
- import numpy
48
32
import pandas
49
- from pandas ._typing import (
50
- CompressionOptions ,
51
- FilePath ,
52
- ReadPickleBuffer ,
53
- StorageOptions ,
54
- )
55
33
56
34
import bigframes ._config as config
57
35
import bigframes .core .blocks
65
43
import bigframes .enums
66
44
import bigframes .functions ._utils as functions_utils
67
45
import bigframes .operations as ops
46
+ from bigframes .pandas .io .api import (
47
+ read_csv ,
48
+ read_gbq ,
49
+ read_gbq_function ,
50
+ read_gbq_model ,
51
+ read_gbq_query ,
52
+ read_gbq_table ,
53
+ read_json ,
54
+ read_pandas ,
55
+ read_parquet ,
56
+ read_pickle ,
57
+ )
68
58
import bigframes .series
69
59
import bigframes .session
70
60
import bigframes .session ._io .bigquery
@@ -373,286 +363,6 @@ def merge(
373
363
merge .__doc__ = vendored_pandas_merge .merge .__doc__
374
364
375
365
376
- def _set_default_session_location_if_possible (query ):
377
- # Set the location as per the query if this is the first query the user is
378
- # running and:
379
- # (1) Default session has not started yet, and
380
- # (2) Location is not set yet, and
381
- # (3) Use of regional endpoints is not set.
382
- # If query is a table name, then it would be the location of the table.
383
- # If query is a SQL with a table, then it would be table's location.
384
- # If query is a SQL with no table, then it would be the BQ default location.
385
- if (
386
- options .bigquery ._session_started
387
- or options .bigquery .location
388
- or options .bigquery .use_regional_endpoints
389
- ):
390
- return
391
-
392
- clients_provider = bigframes .session .clients .ClientsProvider (
393
- project = options .bigquery .project ,
394
- location = options .bigquery .location ,
395
- use_regional_endpoints = options .bigquery .use_regional_endpoints ,
396
- credentials = options .bigquery .credentials ,
397
- application_name = options .bigquery .application_name ,
398
- bq_kms_key_name = options .bigquery .kms_key_name ,
399
- )
400
-
401
- bqclient = clients_provider .bqclient
402
-
403
- if bigframes .session ._io .bigquery .is_query (query ):
404
- # Intentionally run outside of the session so that we can detect the
405
- # location before creating the session. Since it's a dry_run, labels
406
- # aren't necessary.
407
- job = bqclient .query (query , bigquery .QueryJobConfig (dry_run = True ))
408
- options .bigquery .location = job .location
409
- else :
410
- table = bqclient .get_table (query )
411
- options .bigquery .location = table .location
412
-
413
-
414
- # Note: the following methods are duplicated from Session. This duplication
415
- # enables the following:
416
- #
417
- # 1. Static type checking knows the argument and return types, which is
418
- # difficult to do with decorators. Aside: When we require Python 3.10, we
419
- # can use Concatenate for generic typing in decorators. See:
420
- # https://stackoverflow.com/a/68290080/101923
421
- # 2. docstrings get processed by static processing tools, such as VS Code's
422
- # autocomplete.
423
- # 3. Positional arguments function as expected. If we were to pull in the
424
- # methods directly from Session, a Session object would need to be the first
425
- # argument, even if we allow a default value.
426
- # 4. Allows to set BigQuery options for the BigFrames session based on the
427
- # method and its arguments.
428
-
429
-
430
- def read_csv (
431
- filepath_or_buffer : str | IO ["bytes" ],
432
- * ,
433
- sep : Optional [str ] = "," ,
434
- header : Optional [int ] = 0 ,
435
- names : Optional [
436
- Union [MutableSequence [Any ], numpy .ndarray [Any , Any ], Tuple [Any , ...], range ]
437
- ] = None ,
438
- index_col : Optional [
439
- Union [
440
- int ,
441
- str ,
442
- Sequence [Union [str , int ]],
443
- bigframes .enums .DefaultIndexKind ,
444
- Literal [False ],
445
- ]
446
- ] = None ,
447
- usecols : Optional [
448
- Union [
449
- MutableSequence [str ],
450
- Tuple [str , ...],
451
- Sequence [int ],
452
- pandas .Series ,
453
- pandas .Index ,
454
- numpy .ndarray [Any , Any ],
455
- Callable [[Any ], bool ],
456
- ]
457
- ] = None ,
458
- dtype : Optional [Dict ] = None ,
459
- engine : Optional [
460
- Literal ["c" , "python" , "pyarrow" , "python-fwf" , "bigquery" ]
461
- ] = None ,
462
- encoding : Optional [str ] = None ,
463
- ** kwargs ,
464
- ) -> bigframes .dataframe .DataFrame :
465
- return global_session .with_default_session (
466
- bigframes .session .Session .read_csv ,
467
- filepath_or_buffer = filepath_or_buffer ,
468
- sep = sep ,
469
- header = header ,
470
- names = names ,
471
- index_col = index_col ,
472
- usecols = usecols ,
473
- dtype = dtype ,
474
- engine = engine ,
475
- encoding = encoding ,
476
- ** kwargs ,
477
- )
478
-
479
-
480
- read_csv .__doc__ = inspect .getdoc (bigframes .session .Session .read_csv )
481
-
482
-
483
- def read_json (
484
- path_or_buf : str | IO ["bytes" ],
485
- * ,
486
- orient : Literal [
487
- "split" , "records" , "index" , "columns" , "values" , "table"
488
- ] = "columns" ,
489
- dtype : Optional [Dict ] = None ,
490
- encoding : Optional [str ] = None ,
491
- lines : bool = False ,
492
- engine : Literal ["ujson" , "pyarrow" , "bigquery" ] = "ujson" ,
493
- ** kwargs ,
494
- ) -> bigframes .dataframe .DataFrame :
495
- return global_session .with_default_session (
496
- bigframes .session .Session .read_json ,
497
- path_or_buf = path_or_buf ,
498
- orient = orient ,
499
- dtype = dtype ,
500
- encoding = encoding ,
501
- lines = lines ,
502
- engine = engine ,
503
- ** kwargs ,
504
- )
505
-
506
-
507
- read_json .__doc__ = inspect .getdoc (bigframes .session .Session .read_json )
508
-
509
-
510
- def read_gbq (
511
- query_or_table : str ,
512
- * ,
513
- index_col : Iterable [str ] | str | bigframes .enums .DefaultIndexKind = (),
514
- columns : Iterable [str ] = (),
515
- configuration : Optional [Dict ] = None ,
516
- max_results : Optional [int ] = None ,
517
- filters : vendored_pandas_gbq .FiltersType = (),
518
- use_cache : Optional [bool ] = None ,
519
- col_order : Iterable [str ] = (),
520
- ) -> bigframes .dataframe .DataFrame :
521
- _set_default_session_location_if_possible (query_or_table )
522
- return global_session .with_default_session (
523
- bigframes .session .Session .read_gbq ,
524
- query_or_table ,
525
- index_col = index_col ,
526
- columns = columns ,
527
- configuration = configuration ,
528
- max_results = max_results ,
529
- filters = filters ,
530
- use_cache = use_cache ,
531
- col_order = col_order ,
532
- )
533
-
534
-
535
- read_gbq .__doc__ = inspect .getdoc (bigframes .session .Session .read_gbq )
536
-
537
-
538
- def read_gbq_model (model_name : str ):
539
- return global_session .with_default_session (
540
- bigframes .session .Session .read_gbq_model ,
541
- model_name ,
542
- )
543
-
544
-
545
- read_gbq_model .__doc__ = inspect .getdoc (bigframes .session .Session .read_gbq_model )
546
-
547
-
548
- def read_gbq_query (
549
- query : str ,
550
- * ,
551
- index_col : Iterable [str ] | str | bigframes .enums .DefaultIndexKind = (),
552
- columns : Iterable [str ] = (),
553
- configuration : Optional [Dict ] = None ,
554
- max_results : Optional [int ] = None ,
555
- use_cache : Optional [bool ] = None ,
556
- col_order : Iterable [str ] = (),
557
- filters : vendored_pandas_gbq .FiltersType = (),
558
- ) -> bigframes .dataframe .DataFrame :
559
- _set_default_session_location_if_possible (query )
560
- return global_session .with_default_session (
561
- bigframes .session .Session .read_gbq_query ,
562
- query ,
563
- index_col = index_col ,
564
- columns = columns ,
565
- configuration = configuration ,
566
- max_results = max_results ,
567
- use_cache = use_cache ,
568
- col_order = col_order ,
569
- filters = filters ,
570
- )
571
-
572
-
573
- read_gbq_query .__doc__ = inspect .getdoc (bigframes .session .Session .read_gbq_query )
574
-
575
-
576
- def read_gbq_table (
577
- query : str ,
578
- * ,
579
- index_col : Iterable [str ] | str | bigframes .enums .DefaultIndexKind = (),
580
- columns : Iterable [str ] = (),
581
- max_results : Optional [int ] = None ,
582
- filters : vendored_pandas_gbq .FiltersType = (),
583
- use_cache : bool = True ,
584
- col_order : Iterable [str ] = (),
585
- ) -> bigframes .dataframe .DataFrame :
586
- _set_default_session_location_if_possible (query )
587
- return global_session .with_default_session (
588
- bigframes .session .Session .read_gbq_table ,
589
- query ,
590
- index_col = index_col ,
591
- columns = columns ,
592
- max_results = max_results ,
593
- filters = filters ,
594
- use_cache = use_cache ,
595
- col_order = col_order ,
596
- )
597
-
598
-
599
- read_gbq_table .__doc__ = inspect .getdoc (bigframes .session .Session .read_gbq_table )
600
-
601
-
602
- @typing .overload
603
- def read_pandas (pandas_dataframe : pandas .DataFrame ) -> bigframes .dataframe .DataFrame :
604
- ...
605
-
606
-
607
- @typing .overload
608
- def read_pandas (pandas_dataframe : pandas .Series ) -> bigframes .series .Series :
609
- ...
610
-
611
-
612
- @typing .overload
613
- def read_pandas (pandas_dataframe : pandas .Index ) -> bigframes .core .indexes .Index :
614
- ...
615
-
616
-
617
- def read_pandas (pandas_dataframe : Union [pandas .DataFrame , pandas .Series , pandas .Index ]):
618
- return global_session .with_default_session (
619
- bigframes .session .Session .read_pandas ,
620
- pandas_dataframe ,
621
- )
622
-
623
-
624
- read_pandas .__doc__ = inspect .getdoc (bigframes .session .Session .read_pandas )
625
-
626
-
627
- def read_pickle (
628
- filepath_or_buffer : FilePath | ReadPickleBuffer ,
629
- compression : CompressionOptions = "infer" ,
630
- storage_options : StorageOptions = None ,
631
- ):
632
- return global_session .with_default_session (
633
- bigframes .session .Session .read_pickle ,
634
- filepath_or_buffer = filepath_or_buffer ,
635
- compression = compression ,
636
- storage_options = storage_options ,
637
- )
638
-
639
-
640
- read_pickle .__doc__ = inspect .getdoc (bigframes .session .Session .read_pickle )
641
-
642
-
643
- def read_parquet (
644
- path : str | IO ["bytes" ], * , engine : str = "auto"
645
- ) -> bigframes .dataframe .DataFrame :
646
- return global_session .with_default_session (
647
- bigframes .session .Session .read_parquet ,
648
- path ,
649
- engine = engine ,
650
- )
651
-
652
-
653
- read_parquet .__doc__ = inspect .getdoc (bigframes .session .Session .read_parquet )
654
-
655
-
656
366
def remote_function (
657
367
input_types : Union [None , type , Sequence [type ]] = None ,
658
368
output_type : Optional [type ] = None ,
@@ -697,17 +407,6 @@ def remote_function(
697
407
remote_function .__doc__ = inspect .getdoc (bigframes .session .Session .remote_function )
698
408
699
409
700
- def read_gbq_function (function_name : str , is_row_processor : bool = False ):
701
- return global_session .with_default_session (
702
- bigframes .session .Session .read_gbq_function ,
703
- function_name = function_name ,
704
- is_row_processor = is_row_processor ,
705
- )
706
-
707
-
708
- read_gbq_function .__doc__ = inspect .getdoc (bigframes .session .Session .read_gbq_function )
709
-
710
-
711
410
@typing .overload
712
411
def to_datetime (
713
412
arg : Union [
@@ -893,15 +592,19 @@ def reset_session():
893
592
pass
894
593
895
594
# Use __all__ to let type checkers know what is part of the public API.
896
- __all___ = [
595
+ __all__ = [
897
596
# Functions
898
597
"concat" ,
899
598
"merge" ,
900
599
"read_csv" ,
901
600
"read_gbq" ,
902
601
"read_gbq_function" ,
903
602
"read_gbq_model" ,
603
+ "read_gbq_query" ,
604
+ "read_gbq_table" ,
605
+ "read_json" ,
904
606
"read_pandas" ,
607
+ "read_parquet" ,
905
608
"read_pickle" ,
906
609
"remote_function" ,
907
610
"to_datetime" ,
@@ -911,7 +614,7 @@ def reset_session():
911
614
"Float64Dtype" ,
912
615
"Int64Dtype" ,
913
616
"StringDtype" ,
914
- "ArrowDtype"
617
+ "ArrowDtype" ,
915
618
# Class aliases
916
619
"DataFrame" ,
917
620
"Index" ,
0 commit comments