Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configurations/clients/sample/tools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ tools:
fake_call:
tool_call_id: *available_datasets_call_id
version: full
include_indicator_count: true

- type: dataset_structure
<<: *shared_settings
Expand Down
12 changes: 11 additions & 1 deletion statgpt/app/chains/datasets_meta/available_datasets_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,19 @@ async def _arun(self, inputs: dict) -> tuple[str, ToolArtifact]:
versioned_datasets = await data_service.list_available_datasets(auth_context)
datasets = [ds.data for ds in versioned_datasets]

indicator_counts: dict[str, int] | None = None
if self._tool_config.details.include_indicator_count:
indicator_counts = await data_service.get_indicator_counts(
auth_context, versioned_datasets
)

formatter = DatasetsListFormatter(self._dataset_formatter_config, auth_context=auth_context)
response = await formatter.format(
datasets, sort_by_name=True, add_stats=True, group_by_provider=True
datasets,
sort_by_name=True,
add_stats=True,
group_by_provider=True,
indicator_counts=indicator_counts,
)

target = ChainParameters.get_target(inputs)
Expand Down
15 changes: 15 additions & 0 deletions statgpt/app/services/chat_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,21 @@ async def _load_datasets(self, auth_context: AuthContext) -> list[VersionedDataS
async def list_available_datasets(self, auth_context: AuthContext) -> list[VersionedDataSet]:
return await self._load_datasets(auth_context)

async def get_indicator_counts(
self, auth_context: AuthContext, versioned_datasets: list[VersionedDataSet]
) -> dict[str, int]:
"""Get indicator count per dataset `entity_id`."""
vector_store = await self._get_indicators_vector_store(auth_context)
version_to_entity = {
ds.version.version_data_id: ds.data.entity_id for ds in versioned_datasets
}
counts_by_version = await vector_store.get_size_per_version(set(version_to_entity.keys()))
return {
version_to_entity[vid]: count
for vid, count in counts_by_version.items()
if vid in version_to_entity
}

async def get_dataset_hierarchy(self, auth_context: AuthContext) -> DatasetHierarchy | None:
"""Get first available dataset hierarchy from the channel data sources."""

Expand Down
2 changes: 1 addition & 1 deletion statgpt/app/utils/formatters/dataset_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@ async def _get_dataset_update_at(self, dataset: DataSet) -> str:
return last_updated

@abstractmethod
async def format(self, dataset: DataSet) -> str:
async def format(self, dataset: DataSet, indicator_count: int | None = None) -> str:
pass
4 changes: 2 additions & 2 deletions statgpt/app/utils/formatters/dataset_detailed.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ def _append_attributes(self, dataset: DataSet, result: list[str]) -> None:
f'{attribute_details_tabs}- {self._("Description")}: {attr.description}'
)

async def format(self, dataset: DataSet) -> str:
async def format(self, dataset: DataSet, indicator_count: int | None = None) -> str:
result: list[str] = []
await self._append_basic_info(dataset, result)
await self._append_basic_info(dataset, result, indicator_count=indicator_count)
self._append_dimensions(dataset, result)
self._append_attributes(dataset, result)
return "\n".join(result)
11 changes: 8 additions & 3 deletions statgpt/app/utils/formatters/dataset_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

class SimpleDatasetFormatter(BaseDatasetFormatter):

async def _append_basic_info(self, dataset: DataSet, result: list[str]) -> None:
async def _append_basic_info(
self, dataset: DataSet, result: list[str], indicator_count: int | None = None
) -> None:
if self.config.include_name:
name_str = f'**{dataset.name}**' if self.config.highlight_name_in_bold else dataset.name
if self.config.official_dataset_label and dataset.config.is_official:
Expand Down Expand Up @@ -50,7 +52,10 @@ async def _append_basic_info(self, dataset: DataSet, result: list[str]) -> None:
).format(citation)
result.append(formatted_citation)

async def format(self, dataset: DataSet) -> str:
if indicator_count is not None:
result.append(f'{item_tabs}* {self._("Number of indicators")}: {indicator_count}')

async def format(self, dataset: DataSet, indicator_count: int | None = None) -> str:
result: list[str] = []
await self._append_basic_info(dataset, result)
await self._append_basic_info(dataset, result, indicator_count=indicator_count)
return "\n".join(result)
8 changes: 7 additions & 1 deletion statgpt/app/utils/formatters/datasets_list_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ async def format(
sort_by_name: bool = False,
add_stats: bool = False,
group_by_provider: bool = False,
indicator_counts: dict[str, int] | None = None,
) -> str:
if sort_by_id and sort_by_name:
raise ValueError(self._("Cannot sort by both id and name."))
Expand All @@ -50,7 +51,8 @@ async def format(

# Format each dataset
for dataset in iterable:
entry = await self._formatter.format(dataset)
count = indicator_counts.get(dataset.entity_id) if indicator_counts else None
entry = await self._formatter.format(dataset, indicator_count=count)
provider = dataset.config.citation.provider if dataset.config.citation else None
dataset_entries[provider].append(entry)

Expand Down Expand Up @@ -80,6 +82,10 @@ async def format(
# Count unique providers
providers = [p for p in dataset_entries.keys() if p is not None]
stats_header += f'\n{self._("Total providers")}: {len(providers)}'
if indicator_counts is not None:
stats_header += (
f'\n{self._("Total number of indicators")}: {sum(indicator_counts.values())}'
)
result = f'{stats_header}\n\n{datasets_list}'
else:
result = datasets_list
Expand Down
38 changes: 23 additions & 15 deletions statgpt/app/utils/formatters/locales/dataset.pot
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-05 17:19+0200\n"
"POT-Creation-Date: 2026-03-06 14:48+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
Expand All @@ -17,23 +17,27 @@ msgstr ""
"Content-Type: text/plain; charset=CHARSET\n"
"Content-Transfer-Encoding: 8bit\n"

#: dataset_simple.py:16
#: dataset_simple.py:18
msgid "[Official]"
msgstr ""

#: dataset_simple.py:28 dataset_query.py:125 dataset_query.py:247
#: dataset_simple.py:30 dataset_query.py:125 dataset_query.py:247
#: dataset_availablity_query.py:105
msgid "ID"
msgstr ""

#: dataset_simple.py:32
#: dataset_simple.py:34
msgid "Internal ID"
msgstr ""

#: dataset_simple.py:41 dataset_detailed.py:59 dataset_detailed.py:93
#: dataset_simple.py:43 dataset_detailed.py:59 dataset_detailed.py:93
msgid "Description"
msgstr ""

#: dataset_simple.py:56
msgid "Number of indicators"
msgstr ""

#: dataset_detailed.py:21 dataset_detailed.py:31
msgid "Total"
msgstr ""
Expand Down Expand Up @@ -63,40 +67,44 @@ msgstr ""
msgid "Attributes"
msgstr ""

#: datasets_list_formatter.py:39
#: datasets_list_formatter.py:40
msgid "Cannot sort by both id and name."
msgstr ""

#: datasets_list_formatter.py:62 datasets_list_formatter.py:64 citation.py:28
#: datasets_list_formatter.py:64 datasets_list_formatter.py:66 citation.py:28
msgid "Provider"
msgstr ""

#: datasets_list_formatter.py:64 datasets_list_formatter.py:110
#: datasets_list_formatter.py:66 datasets_list_formatter.py:116
msgid "Unknown"
msgstr ""

#: datasets_list_formatter.py:67
#: datasets_list_formatter.py:69
msgid "Total datasets from this provider"
msgstr ""

#: datasets_list_formatter.py:78 datasets_list_formatter.py:100
#: datasets_list_formatter.py:105
#: datasets_list_formatter.py:80 datasets_list_formatter.py:106
#: datasets_list_formatter.py:111
msgid "Total datasets"
msgstr ""

#: datasets_list_formatter.py:82
#: datasets_list_formatter.py:84
msgid "Total providers"
msgstr ""

#: datasets_list_formatter.py:101
#: datasets_list_formatter.py:87
msgid "Total number of indicators"
msgstr ""

#: datasets_list_formatter.py:107
msgid "Official"
msgstr ""

#: datasets_list_formatter.py:102
#: datasets_list_formatter.py:108
msgid "Unofficial"
msgstr ""

#: datasets_list_formatter.py:114
#: datasets_list_formatter.py:120
msgid "By provider"
msgstr ""

Expand Down
Binary file modified statgpt/app/utils/formatters/locales/en/LC_MESSAGES/dataset.mo
Binary file not shown.
38 changes: 23 additions & 15 deletions statgpt/app/utils/formatters/locales/en/LC_MESSAGES/dataset.po
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ msgid ""
msgstr ""
"Project-Id-Version: statgpt 1.0\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-05 17:19+0200\n"
"POT-Creation-Date: 2026-03-06 14:48+0200\n"
"PO-Revision-Date: 2024-01-01 00:00+0000\n"
"Last-Translator: \n"
"Language-Team: English\n"
Expand All @@ -15,24 +15,28 @@ msgstr ""
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"

#: dataset_simple.py:16
#: dataset_simple.py:18
msgid "[Official]"
msgstr "[Official]"

# Basic labels
#: dataset_simple.py:28 dataset_query.py:125 dataset_query.py:247
#: dataset_simple.py:30 dataset_query.py:125 dataset_query.py:247
#: dataset_availablity_query.py:105
msgid "ID"
msgstr "ID"

#: dataset_simple.py:32
#: dataset_simple.py:34
msgid "Internal ID"
msgstr "Internal ID"

#: dataset_simple.py:41 dataset_detailed.py:59 dataset_detailed.py:93
#: dataset_simple.py:43 dataset_detailed.py:59 dataset_detailed.py:93
msgid "Description"
msgstr "Description"

#: dataset_simple.py:56
msgid "Number of indicators"
msgstr "Number of indicators"

#: dataset_detailed.py:21 dataset_detailed.py:31
msgid "Total"
msgstr "Total"
Expand Down Expand Up @@ -63,40 +67,44 @@ msgid "Attributes"
msgstr "Attributes"

# List formatter strings
#: datasets_list_formatter.py:39
#: datasets_list_formatter.py:40
msgid "Cannot sort by both id and name."
msgstr "Cannot sort by both id and name."

#: datasets_list_formatter.py:62 datasets_list_formatter.py:64 citation.py:28
#: datasets_list_formatter.py:64 datasets_list_formatter.py:66 citation.py:28
msgid "Provider"
msgstr "Provider"

#: datasets_list_formatter.py:64 datasets_list_formatter.py:110
#: datasets_list_formatter.py:66 datasets_list_formatter.py:116
msgid "Unknown"
msgstr "Unknown"

#: datasets_list_formatter.py:67
#: datasets_list_formatter.py:69
msgid "Total datasets from this provider"
msgstr "Total datasets from this provider"

#: datasets_list_formatter.py:78 datasets_list_formatter.py:100
#: datasets_list_formatter.py:105
#: datasets_list_formatter.py:80 datasets_list_formatter.py:106
#: datasets_list_formatter.py:111
msgid "Total datasets"
msgstr "Total datasets"

#: datasets_list_formatter.py:82
#: datasets_list_formatter.py:84
msgid "Total providers"
msgstr "Total providers"

#: datasets_list_formatter.py:101
#: datasets_list_formatter.py:87
msgid "Total number of indicators"
msgstr "Total number of indicators"

#: datasets_list_formatter.py:107
msgid "Official"
msgstr "Official"

#: datasets_list_formatter.py:102
#: datasets_list_formatter.py:108
msgid "Unofficial"
msgstr "Unofficial"

#: datasets_list_formatter.py:114
#: datasets_list_formatter.py:120
msgid "By provider"
msgstr "By provider"

Expand Down
Binary file modified statgpt/app/utils/formatters/locales/uk/LC_MESSAGES/dataset.mo
Binary file not shown.
Loading
Loading