Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions pygwalker/data_parsers/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Generic, Dict, List, Any, Optional
from typing_extensions import Literal
from functools import lru_cache
from threading import Lock
from datetime import datetime, date
from datetime import timedelta
import abc
Expand Down Expand Up @@ -132,22 +133,35 @@ def __init__(
self.infer_string_to_date = infer_string_to_date
self.infer_number_to_dimension = infer_number_to_dimension
self.other_params = other_params
self._field_metas_cache = None
self._raw_fields_cache = None
self._cache_lock = Lock()

@property
@lru_cache()
def field_metas(self) -> List[Dict[str, str]]:
duckdb.register("pygwalker_mid_table", self._duckdb_df)
result = duckdb.query("SELECT * FROM pygwalker_mid_table LIMIT 1")
data = result.fetchone()
return get_data_meta_type(dict(zip(result.columns, data))) if data else []
cache = self._field_metas_cache
if cache is not None:
return cache
with self._cache_lock:
if self._field_metas_cache is None:
duckdb.register("pygwalker_mid_table", self._duckdb_df)
result = duckdb.query("SELECT * FROM pygwalker_mid_table LIMIT 1")
data = result.fetchone()
self._field_metas_cache = get_data_meta_type(dict(zip(result.columns, data))) if data else []
return self._field_metas_cache

@property
@lru_cache()
def raw_fields(self) -> List[Dict[str, str]]:
return [
self._infer_prop(col, self.field_specs)
for _, col in enumerate(self._example_df.columns)
]
cache = self._raw_fields_cache
if cache is not None:
return cache
with self._cache_lock:
if self._raw_fields_cache is None:
self._raw_fields_cache = [
self._infer_prop(col, self.field_specs)
for _, col in enumerate(self._example_df.columns)
]
return self._raw_fields_cache

def _infer_prop(
self, col: str, field_specs: List[FieldSpec] = None
Expand Down
45 changes: 29 additions & 16 deletions pygwalker/data_parsers/cloud_dataset_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Any, Dict, List, Optional
from functools import lru_cache
from threading import Lock
from decimal import Decimal
import logging
import io
Expand Down Expand Up @@ -31,6 +31,9 @@ def __init__(
self.other_params = other_params
self._cloud_service = CloudService(other_params.get("kanaries_api_key", ""))
self.example_pandas_df = self._get_example_pandas_df()
self._field_metas_cache = None
self._raw_fields_cache = None
self._cache_lock = Lock()

def _get_example_pandas_df(self) -> pd.DataFrame:
datas = self._get_all_datas(1000)
Expand All @@ -41,25 +44,35 @@ def _get_example_pandas_df(self) -> pd.DataFrame:
return example_df

@property
@lru_cache()
def field_metas(self) -> List[Dict[str, str]]:
data = self._get_all_datas(1)
return get_data_meta_type(data[0]) if data else []
cache = self._field_metas_cache
if cache is not None:
return cache
with self._cache_lock:
if self._field_metas_cache is None:
data = self._get_all_datas(1)
self._field_metas_cache = get_data_meta_type(data[0]) if data else []
return self._field_metas_cache

@property
@lru_cache()
def raw_fields(self) -> List[Dict[str, str]]:
pandas_parser = PandasDataFrameDataParser(
self.example_pandas_df,
self.field_specs,
self.infer_string_to_date,
self.infer_number_to_dimension,
self.other_params
)
return [
{**field, "fid": field["name"]}
for field in pandas_parser.raw_fields
]
cache = self._raw_fields_cache
if cache is not None:
return cache
with self._cache_lock:
if self._raw_fields_cache is None:
pandas_parser = PandasDataFrameDataParser(
self.example_pandas_df,
self.field_specs,
self.infer_string_to_date,
self.infer_number_to_dimension,
self.other_params
)
self._raw_fields_cache = [
{**field, "fid": field["name"]}
for field in pandas_parser.raw_fields
]
return self._raw_fields_cache

def to_records(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
if limit is None:
Expand Down
45 changes: 29 additions & 16 deletions pygwalker/data_parsers/database_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Any, Dict, List, Optional
from functools import lru_cache
from threading import Lock
from decimal import Decimal
import logging
import json
Expand Down Expand Up @@ -158,6 +158,9 @@ def __init__(
self.infer_string_to_date = infer_string_to_date
self.infer_number_to_dimension = infer_number_to_dimension
self.other_params = other_params
self._field_metas_cache = None
self._raw_fields_cache = None
self._cache_lock = Lock()

def _get_example_pandas_df(self) -> pd.DataFrame:
sql = self._format_sql(f"SELECT * FROM {self.placeholder_table_name} LIMIT 1000")
Expand Down Expand Up @@ -187,25 +190,35 @@ def placeholder_table_name(self) -> str:
return "___pygwalker_temp_view_name___"

@property
@lru_cache()
def field_metas(self) -> List[Dict[str, str]]:
data = self._get_datas_by_sql(f"SELECT * FROM {self.placeholder_table_name} LIMIT 1")
return get_data_meta_type(data[0]) if data else []
cache = self._field_metas_cache
if cache is not None:
return cache
with self._cache_lock:
if self._field_metas_cache is None:
data = self._get_datas_by_sql(f"SELECT * FROM {self.placeholder_table_name} LIMIT 1")
self._field_metas_cache = get_data_meta_type(data[0]) if data else []
return self._field_metas_cache

@property
@lru_cache()
def raw_fields(self) -> List[Dict[str, str]]:
pandas_parser = PandasDataFrameDataParser(
self.example_pandas_df,
self.field_specs,
self.infer_string_to_date,
self.infer_number_to_dimension,
self.other_params
)
return [
{**field, "fid": field["name"]}
for field in pandas_parser.raw_fields
]
cache = self._raw_fields_cache
if cache is not None:
return cache
with self._cache_lock:
if self._raw_fields_cache is None:
pandas_parser = PandasDataFrameDataParser(
self.example_pandas_df,
self.field_specs,
self.infer_string_to_date,
self.infer_number_to_dimension,
self.other_params
)
self._raw_fields_cache = [
{**field, "fid": field["name"]}
for field in pandas_parser.raw_fields
]
return self._raw_fields_cache

def to_records(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
if limit is None:
Expand Down
39 changes: 26 additions & 13 deletions pygwalker/data_parsers/spark_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Any, Dict, List, Optional
from functools import lru_cache
from threading import Lock
import logging
import io

Expand Down Expand Up @@ -39,24 +39,37 @@ def __init__(
self.infer_string_to_date = infer_string_to_date
self.infer_number_to_dimension = infer_number_to_dimension
self.other_params = other_params
self._field_metas_cache = None
self._raw_fields_cache = None
self._cache_lock = Lock()

@property
@lru_cache()
def raw_fields(self) -> List[Dict[str, str]]:
pandas_parser = PandasDataFrameDataParser(
self.example_pandas_df,
self.field_specs,
self.infer_string_to_date,
self.infer_number_to_dimension,
self.other_params
)
return pandas_parser.raw_fields
cache = self._raw_fields_cache
if cache is not None:
return cache
with self._cache_lock:
if self._raw_fields_cache is None:
pandas_parser = PandasDataFrameDataParser(
self.example_pandas_df,
self.field_specs,
self.infer_string_to_date,
self.infer_number_to_dimension,
self.other_params
)
self._raw_fields_cache = pandas_parser.raw_fields
return self._raw_fields_cache

@property
@lru_cache()
def field_metas(self) -> List[Dict[str, str]]:
data = self.get_datas_by_sql("SELECT * FROM pygwalker_mid_table LIMIT 1")
return get_data_meta_type(data[0]) if data else []
cache = self._field_metas_cache
if cache is not None:
return cache
with self._cache_lock:
if self._field_metas_cache is None:
data = self.get_datas_by_sql("SELECT * FROM pygwalker_mid_table LIMIT 1")
self._field_metas_cache = get_data_meta_type(data[0]) if data else []
return self._field_metas_cache

def to_records(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
df = self.df.limit(limit) if limit is not None else self.df
Expand Down