From 23873eda38f61f37f5b11dab77e0c2753522ed52 Mon Sep 17 00:00:00 2001 From: Soundarya Venkatesan Date: Thu, 14 Aug 2025 14:56:06 +1000 Subject: [PATCH] [NEP-19833]: Fetching normal and shared datasets separately --- lib/superset/dashboard/bulk_delete_cascade.rb | 7 +-- lib/superset/dashboard/compare.rb | 4 +- lib/superset/dashboard/datasets/list.rb | 44 ++++++++++++------- lib/superset/dashboard/warm_up_cache.rb | 2 +- lib/superset/services/duplicate_dashboard.rb | 9 ++-- .../dashboard/bulk_delete_cascade_spec.rb | 2 +- spec/superset/dashboard/datasets/list_spec.rb | 24 +++++----- 7 files changed, 56 insertions(+), 36 deletions(-) diff --git a/lib/superset/dashboard/bulk_delete_cascade.rb b/lib/superset/dashboard/bulk_delete_cascade.rb index 61a4be8..d9936b6 100644 --- a/lib/superset/dashboard/bulk_delete_cascade.rb +++ b/lib/superset/dashboard/bulk_delete_cascade.rb @@ -9,10 +9,11 @@ module Dashboard class BulkDeleteCascade class InvalidParameterError < StandardError; end - attr_reader :dashboard_ids + attr_reader :dashboard_ids, :ignore_shared_datasets - def initialize(dashboard_ids: []) + def initialize(dashboard_ids: [], ignore_shared_datasets: false) @dashboard_ids = dashboard_ids + @ignore_shared_datasets = ignore_shared_datasets end def perform @@ -31,7 +32,7 @@ def perform private def delete_datasets(dashboard_id) - datasets_to_delete = Superset::Dashboard::Datasets::List.new(dashboard_id: dashboard_id).datasets_details.map{|d| d[:id] } + datasets_to_delete = Superset::Dashboard::Datasets::List.new(dashboard_id: dashboard_id, separate_shared_datasets: ignore_shared_datasets).datasets_details["datasets"].map{|d| d[:id] } Superset::Dataset::BulkDelete.new(dataset_ids: datasets_to_delete).perform if datasets_to_delete.any? end diff --git a/lib/superset/dashboard/compare.rb b/lib/superset/dashboard/compare.rb index cb7dfe6..db223b9 100644 --- a/lib/superset/dashboard/compare.rb +++ b/lib/superset/dashboard/compare.rb @@ -37,8 +37,8 @@ def second_dashboard def list_datasets puts "\n ====== DASHBOARD DATASETS ====== " - Superset::Dashboard::Datasets::List.new(dashboard_id: first_dashboard_id).list - Superset::Dashboard::Datasets::List.new(dashboard_id: second_dashboard_id).list + Superset::Dashboard::Datasets::List.new(dashboard_id: first_dashboard_id, include_filter_datasets: true).list + Superset::Dashboard::Datasets::List.new(dashboard_id: second_dashboard_id, include_filter_datasets: true).list end def list_charts diff --git a/lib/superset/dashboard/datasets/list.rb b/lib/superset/dashboard/datasets/list.rb index 8ebf9b9..fc1a150 100644 --- a/lib/superset/dashboard/datasets/list.rb +++ b/lib/superset/dashboard/datasets/list.rb @@ -7,15 +7,16 @@ module Superset module Dashboard module Datasets class List < Superset::Request - attr_reader :id, :include_filter_datasets # id - dashboard id + attr_reader :id, :include_filter_datasets, :separate_shared_datasets # id - dashboard id def self.call(id) self.new(id).list end - def initialize(dashboard_id:, include_filter_datasets: false) + def initialize(dashboard_id:, include_filter_datasets: false, separate_shared_datasets: false) @id = dashboard_id @include_filter_datasets = include_filter_datasets + @separate_shared_datasets = separate_shared_datasets end def perform @@ -25,7 +26,7 @@ def perform def schemas @schemas ||= begin - all_dashboard_schemas = datasets_details.map {|d| d[:schema] }.uniq + all_dashboard_schemas = datasets_details["datasets"].map {|d| d[:schema] }.uniq # For the current superset setup we will assume a dashboard datasets will point to EXACTLY one schema, their own. # if not .. we need to know about it. Potentially we could override this check if others do not consider it a problem. @@ -40,12 +41,16 @@ def datasets_details chart_datasets = result.map do |details| details.slice('id', 'datasource_name', 'schema', 'sql').merge('database' => details['database'].slice('id', 'name', 'backend')).with_indifferent_access end - return chart_datasets unless include_filter_datasets + dashboard_datasets = {'datasets' => chart_datasets, 'shared_datasets' => []} + return dashboard_datasets unless include_filter_datasets chart_dataset_ids = chart_datasets.map{|d| d['id'] } filter_dataset_ids_not_used_in_charts = filter_dataset_ids - chart_dataset_ids - return chart_datasets if filter_dataset_ids_not_used_in_charts.empty? + return dashboard_datasets if filter_dataset_ids_not_used_in_charts.empty? # returning chart and filter datasets - chart_datasets + filter_datasets(filter_dataset_ids_not_used_in_charts) + filter_datasets = filter_datasets(filter_dataset_ids_not_used_in_charts) + dashboard_datasets['datasets'] += filter_datasets[:datasets] + dashboard_datasets['shared_datasets'] += filter_datasets[:shared_datasets] + return dashboard_datasets end private @@ -55,17 +60,26 @@ def filter_dataset_ids end def filter_datasets(filter_dataset_ids_not_used_in_charts) - filter_dataset_ids_not_used_in_charts.map do |filter_dataset_id| - filter_dataset = Superset::Dataset::Get.new(filter_dataset_id).result - database_info = { - 'id' => filter_dataset['database']['id'], - 'name' => filter_dataset['database']['database_name'], - 'backend' => filter_dataset['database']['backend'] - } - filter_dataset.slice('id', 'datasource_name', 'schema', 'sql').merge('database' => database_info, 'filter_only': true).with_indifferent_access + filter_dataset_ids_not_used_in_charts.each_with_object({datasets: [], shared_datasets: []}) do |filter_dataset_id, result| + dataset = Superset::Dataset::Get.new(filter_dataset_id).result + if separate_shared_datasets && JSON.parse(dataset['extra'] || "{}")["shared"] + result[:shared_datasets] << sliced_dataset(dataset) + else + result[:datasets] << sliced_dataset(dataset) + end end end + def sliced_dataset(dataset) + database_info = { + 'id' => dataset['database']['id'], + 'name' => dataset['database']['database_name'], + 'backend' => dataset['database']['backend'] + } + dataset.slice('id', 'datasource_name', 'schema', 'sql').merge('database' => database_info, 'filter_only': true).with_indifferent_access + end + + def route "dashboard/#{id}/datasets" end @@ -75,7 +89,7 @@ def list_attributes end def rows - datasets_details.map do |d| + datasets_details["datasets"].map do |d| [ d[:id], d[:datasource_name], diff --git a/lib/superset/dashboard/warm_up_cache.rb b/lib/superset/dashboard/warm_up_cache.rb index 8fb6b2b..8135e50 100644 --- a/lib/superset/dashboard/warm_up_cache.rb +++ b/lib/superset/dashboard/warm_up_cache.rb @@ -35,7 +35,7 @@ def validate_dashboard_id end def fetch_dataset_details(dashboard_id) - Superset::Dashboard::Datasets::List.new(dashboard_id: dashboard_id).datasets_details.map { |dataset| dataset['database'].slice('name').merge(dataset.slice('datasource_name'))} + Superset::Dashboard::Datasets::List.new(dashboard_id: dashboard_id).datasets_details['datasets'].map { |dataset| dataset['database'].slice('name').merge(dataset.slice('datasource_name'))} end end end diff --git a/lib/superset/services/duplicate_dashboard.rb b/lib/superset/services/duplicate_dashboard.rb index 03e2260..976e80d 100644 --- a/lib/superset/services/duplicate_dashboard.rb +++ b/lib/superset/services/duplicate_dashboard.rb @@ -9,15 +9,16 @@ module Superset module Services class DuplicateDashboard < Superset::Request - attr_reader :source_dashboard_id, :target_schema, :target_database_id, :allowed_domains, :tags, :publish + attr_reader :source_dashboard_id, :target_schema, :target_database_id, :allowed_domains, :tags, :publish, :ignore_duplicate_datasets - def initialize(source_dashboard_id:, target_schema:, target_database_id: , allowed_domains: [], tags: [], publish: false) + def initialize(source_dashboard_id:, target_schema:, target_database_id: , allowed_domains: [], tags: [], publish: false, ignore_duplicate_datasets: false) @source_dashboard_id = source_dashboard_id @target_schema = target_schema @target_database_id = target_database_id @allowed_domains = allowed_domains @tags = tags @publish = publish + @ignore_duplicate_datasets = ignore_duplicate_datasets end def perform @@ -184,7 +185,7 @@ def new_dashboard # retrieve the datasets that will be duplicated def source_dashboard_datasets - @source_dashboard_datasets ||= Superset::Dashboard::Datasets::List.new(dashboard_id: source_dashboard_id, include_filter_datasets: true).datasets_details + @source_dashboard_datasets ||= Superset::Dashboard::Datasets::List.new(dashboard_id: source_dashboard_id, include_filter_datasets: true).datasets_details['datasets'] rescue => e raise "Unable to retrieve datasets for source dashboard #{source_dashboard_id}: #{e.message}" end @@ -205,7 +206,7 @@ def validate_params raise ValidationError, "One or more source dashboard filters point to a different schema than the dashboard charts. Identified Unpermittied Filter Dataset Ids are #{unpermitted_filter_dataset_ids.to_s}" if unpermitted_filter_dataset_ids.any? # new dataset validations - Need to be commented for EU dashboard duplication as we are using the existing datasets for the new dashboard - raise ValidationError, "DATASET NAME CONFLICT: The Target Schema #{target_schema} already has existing datasets named: #{target_schema_matching_dataset_names.join(',')}" unless target_schema_matching_dataset_names.empty? + raise ValidationError, "DATASET NAME CONFLICT: The Target Schema #{target_schema} already has existing datasets named: #{target_schema_matching_dataset_names.join(',')}" if !ignore_duplicate_datasets && target_schema_matching_dataset_names.present? validate_source_dashboard_datasets_sql_does_not_hard_code_schema # embedded allowed_domain validations diff --git a/spec/superset/dashboard/bulk_delete_cascade_spec.rb b/spec/superset/dashboard/bulk_delete_cascade_spec.rb index 5bd7838..385dff4 100644 --- a/spec/superset/dashboard/bulk_delete_cascade_spec.rb +++ b/spec/superset/dashboard/bulk_delete_cascade_spec.rb @@ -23,7 +23,7 @@ let(:dashboard_ids) { [1] } before do - allow(Superset::Dashboard::Datasets::List).to receive(:new).with(dashboard_id: 1).and_return(double(datasets_details: [{ id: 11 }, { id: 12 }])) + allow(Superset::Dashboard::Datasets::List).to receive(:new).with(dashboard_id: 1).and_return(double(datasets_details: {'datasets' => [{ id: 11 }, { id: 12 }] })) allow(Superset::Dataset::BulkDelete).to receive(:new).with(dataset_ids: [11,12]).and_return(double(perform: true)) allow(Superset::Dashboard::Charts::List).to receive(:new).with(1).and_return(double(chart_ids: [21, 22])) diff --git a/spec/superset/dashboard/datasets/list_spec.rb b/spec/superset/dashboard/datasets/list_spec.rb index b43dc97..e173985 100644 --- a/spec/superset/dashboard/datasets/list_spec.rb +++ b/spec/superset/dashboard/datasets/list_spec.rb @@ -5,12 +5,13 @@ let(:dashboard_id) { 1 } let(:include_filter_datasets) { false } let(:result) do - [ - { - 'id' => 101, - 'datasource_name' => 'Acme Forecasts', - 'database' => { 'id' => 1, 'name' => 'DB1', 'backend' => 'postgres' }, - 'schema' => 'acme', + { + 'datasets' => [ + { + 'id' => 101, + 'datasource_name' => 'Acme Forecasts', + 'database' => { 'id' => 1, 'name' => 'DB1', 'backend' => 'postgres' }, + 'schema' => 'acme', 'sql' => 'select * from acme.forecasts' }.with_indifferent_access, { @@ -21,6 +22,7 @@ 'sql' => 'select * from acme_new.forecasts' }.with_indifferent_access ] + } end before do @@ -60,10 +62,12 @@ describe '#datasets_details' do it 'returns a list of datasets' do - expect(subject.datasets_details).to eq([ - {"id"=>101, "datasource_name"=>"Acme Forecasts", "schema"=>"acme", "database"=>{"id"=>1, "name"=>"DB1", "backend"=>"postgres"}, "sql"=>"select * from acme.forecasts"}, - {"id"=>102, "datasource_name"=>"video_game_sales", "schema"=>"public", "database"=>{"id"=>2, "name"=>"examples", "backend"=>"postgres"}, "sql"=>"select * from acme_new.forecasts"} - ]) + expect(subject.datasets_details).to eq({ + 'datasets' => [ + {"id"=>101, "datasource_name"=>"Acme Forecasts", "schema"=>"acme", "database"=>{"id"=>1, "name"=>"DB1", "backend"=>"postgres"}, "sql"=>"select * from acme.forecasts"}, + {"id"=>102, "datasource_name"=>"video_game_sales", "schema"=>"public", "database"=>{"id"=>2, "name"=>"examples", "backend"=>"postgres"}, "sql"=>"select * from acme_new.forecasts"} + ] + }) end context 'returns both chart and filter datasets when include_filter_datasets is true' do