diff --git a/recce/cli.py b/recce/cli.py index 7fc742bd0..9c6362564 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -1,11 +1,14 @@ import asyncio import os +import sys from pathlib import Path from typing import List import click import uvicorn from click import Abort +import yaml +import shutil from recce import event from recce.artifact import download_dbt_artifacts, upload_dbt_artifacts @@ -173,6 +176,10 @@ def cli(ctx, **kwargs): ) error_console.print("Please update using the command: 'pip install --upgrade recce'.", end="\n\n") + # Register init and validate commands + cli.add_command(init) + cli.add_command(validate) + @cli.command(cls=TrackCommand) def version(): @@ -299,7 +306,7 @@ def debug(**kwargs): else: if not base_manifest_is_ready: console.print( - "[[orange3]TIP[/orange3]] `dbt run --target-path target-base` to generate the manifest JSON file for the base environment" + "[[orange3]TIP[/orange3]] `dbt docs generate --target-path target-base` to generate the manifest JSON file for the base environment" ) if not base_catalog_is_ready: console.print( @@ -461,6 +468,19 @@ def server(host, port, lifetime, state_file=None, **kwargs): f"'{target_base_path}'." ) console.print("https://docs.datarecce.io/get-started/#prepare-dbt-artifacts") + console.print("\n[blue]To use Recce, we need to generate dbt metadata files (called 'artifacts') for both your development and production environments.[/blue]") + console.print("\n[blue]Step 1: Generate development artifacts[/blue]") + console.print("To run in your dev branch:") + console.print(" dbt docs generate --target dev") + console.print(" # This creates metadata about your current development environment\n") + console.print("[blue]Step 2: Generate production artifacts[/blue]") + console.print("To run in your main or production branch:") + console.print(" dbt docs generate --target prod --target-path target-base\n") + console.print("[blue]After running these commands, you should see:[/blue]") + console.print(" - A 'target' folder with manifest.json (development)") + console.print(" - A 'target-base' folder with manifest.json (production)\n") + console.print("For more information on setting up Snowflake profiles, visit: https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup") + console.print("See the Recce docs for more details: https://docs.datarecce.io/") console.print() state_loader = create_state_loader(is_review, is_cloud, state_file, cloud_options) @@ -1197,5 +1217,409 @@ def read_only(host, port, lifetime, state_file=None, **kwargs): uvicorn.run(app, host=host, port=port, lifespan="on") +def validate_profiles_config(profiles_config, database_type): + """Validate the generated profiles configuration.""" + from rich.console import Console + console = Console() + + required_fields = { + 'snowflake': ['account', 'user', 'password', 'warehouse', 'role'], + 'bigquery': ['method', 'project', 'dataset', 'location'], + 'postgres': ['host', 'port', 'user', 'password', 'dbname'], + 'duckdb': ['path', 'threads'] + } + + # Check if project name is still the default + if 'your_project_name' in profiles_config: + console.print("[yellow]⚠️ Warning: Using default project name 'your_project_name'. Please update it in profiles.yml[/yellow]") + + # Validate each environment + for env in ['dev', 'prod']: + if env not in profiles_config.get('your_project_name', {}).get('outputs', {}): + console.print(f"[red]❌ Error: Missing {env} environment configuration[/red]") + return False + + config = profiles_config['your_project_name']['outputs'][env] + + # Check required fields + missing_fields = [field for field in required_fields[database_type] + if field not in config or config[field] == f'your_{field}'] + + if missing_fields: + console.print(f"[red]❌ Error: Missing or using default values for required fields in {env} environment:[/red]") + for field in missing_fields: + console.print(f" - {field}") + return False + + # Type-specific validations + if database_type == 'postgres': + try: + port = int(config['port']) + if not (1 <= port <= 65535): + console.print(f"[red]❌ Error: Invalid port number in {env} environment[/red]") + return False + except ValueError: + console.print(f"[red]❌ Error: Port must be a number in {env} environment[/red]") + return False + + elif database_type == 'duckdb': + if not config['path'].endswith('.duckdb'): + console.print(f"[yellow]⚠️ Warning: DuckDB path should end with .duckdb in {env} environment[/yellow]") + + return True + +def test_database_connection(profiles_config, database_type, env='dev'): + """Test the database connection using the provided configuration.""" + from rich.console import Console + from dbt.cli.main import dbtRunner, dbtRunnerResult + import tempfile + import os + + console = Console() + + # Create a temporary dbt project for testing + with tempfile.TemporaryDirectory() as temp_dir: + # Create a minimal dbt_project.yml + project_yml = """ +name: 'connection_test' +version: '1.0.0' +config-version: 2 +profile: 'your_project_name' + """ + with open(os.path.join(temp_dir, 'dbt_project.yml'), 'w') as f: + f.write(project_yml) + + # Create a minimal model + os.makedirs(os.path.join(temp_dir, 'models'), exist_ok=True) + model_sql = """ +select 1 as test + """ + with open(os.path.join(temp_dir, 'models', 'test.sql'), 'w') as f: + f.write(model_sql) + + # Write the profiles configuration + os.makedirs(os.path.expanduser('~/.dbt'), exist_ok=True) + with open(os.path.expanduser('~/.dbt/profiles.yml'), 'w') as f: + yaml.dump(profiles_config, f, default_flow_style=False) + + # Test the connection + console.print(f"\n[blue]Testing {env} environment connection...[/blue]") + dbt = dbtRunner() + res: dbtRunnerResult = dbt.invoke(['debug', '--project-dir', temp_dir, '--target', env]) + + if res.success: + console.print(f"[green]✅ Successfully connected to {env} environment[/green]") + return True + else: + console.print(f"[red]❌ Failed to connect to {env} environment[/red]") + console.print(f"Error: {res.exception}") + return False + +@cli.command(cls=TrackCommand) +@click.option('--target', type=click.Choice(['dev', 'prod']), default='dev', help='Target environment to generate docs for') +@click.option('--target-path', type=str, default='target', help='Path to store the generated docs') +@add_options(dbt_related_options) +def generate_docs(target, target_path, **kwargs): + """ + Automatically generate dbt docs for the specified target environment. + """ + from rich.console import Console + import subprocess + import os + + console = Console() + + # Construct the command + command = ['dbt', 'docs', 'generate', '--target', target] + if target == 'prod': + command.extend(['--target-path', target_path]) + + # Execute the command + console.print(f"[blue]Generating dbt docs for {target} environment...[/blue]") + result = subprocess.run(command, capture_output=True, text=True) + + if result.returncode == 0: + console.print(f"[green]✅ Successfully generated dbt docs for {target} environment[/green]") + else: + console.print(f"[red]❌ Failed to generate dbt docs: {result.stderr}[/red]") + + +@cli.command(cls=TrackCommand) +@click.option('--database-type', type=click.Choice(['snowflake', 'bigquery', 'postgres', 'duckdb']), prompt='What type of database are you using?') +@click.option('--dev-schema', prompt='What schema name would you like to use for development?', default='dev') +@click.option('--prod-schema', prompt='What schema name would you like to use for production?', default='prod') +@click.option('--validate', is_flag=True, help='Validate the configuration and test database connections') +@click.option('--explain', is_flag=True, help='Show detailed explanation of the initialization process') +@add_options(dbt_related_options) +def init(database_type, dev_schema, prod_schema, validate, explain, **kwargs): + """Initialize a new dbt project configuration.""" + from rich.console import Console + console = Console() + + # If --explain flag is used, show only the explanation and return + if explain: + console.print("\n[blue]To use Recce, we need to generate dbt metadata files (called 'artifacts') for both your development and production environments.[/blue]") + console.print("\n[blue]Step 1: Generate development artifacts[/blue]") + console.print("To run in your dev branch:") + console.print(" dbt docs generate --target dev") + console.print(" # This creates metadata about your current development environment\n") + console.print("[blue]Step 2: Generate production artifacts[/blue]") + console.print("To run in your main or production branch:") + console.print(" dbt docs generate --target prod --target-path target-base\n") + console.print("[blue]After running these commands, you should see:[/blue]") + console.print(" - A 'target' folder with manifest.json (development)") + console.print(" - A 'target-base' folder with manifest.json (production)\n") + console.print("For more information on setting up Snowflake profiles, visit: https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup") + console.print("See the Recce docs for more details: https://docs.datarecce.io/") + return + + # Only prompt for database type if not using --explain + if not database_type: + database_type = click.prompt('What type of database are you using?', type=click.Choice(['snowflake', 'bigquery', 'postgres', 'duckdb'])) + if not dev_schema: + dev_schema = click.prompt('What schema name would you like to use for development?', default='dev') + if not prod_schema: + prod_schema = click.prompt('What schema name would you like to use for production?', default='prod') + + # Use the simpler approach for setting directories + project_dir = os.path.abspath(kwargs.get('project_dir') or os.getcwd()) + profiles_dir = os.path.abspath(kwargs.get('profiles_dir') or project_dir) + + # Create profiles directory if it doesn't exist + if not os.path.exists(profiles_dir): + os.makedirs(profiles_dir) + + # Create profiles.yml + profiles_path = os.path.join(profiles_dir, 'profiles.yml') + + # Get project name from dbt_project.yml if available + project_name = 'jaffle_shop' # Default project name + try: + with open(os.path.join(project_dir, 'dbt_project.yml'), 'r') as f: + project_config = yaml.safe_load(f) + project_name = project_config.get('name', project_name) + except: + pass + + # Generate configuration based on database type + config = { + project_name: { + 'target': 'dev', # Set default target + 'outputs': { + 'dev': { + 'type': database_type, + 'schema': dev_schema, + }, + 'prod': { + 'type': database_type, + 'schema': prod_schema, + } + } + } + } + + # Add database-specific configuration + if database_type == 'duckdb': + config[project_name]['outputs']['dev'].update({ + 'path': 'jaffle_shop.duckdb', + 'threads': 24 + }) + config[project_name]['outputs']['prod'].update({ + 'path': 'jaffle_shop.duckdb', + 'threads': 24 + }) + elif database_type == 'snowflake': + config[project_name]['outputs']['dev'].update({ + 'account': 'your_account', + 'user': 'your_username', + 'password': 'your_password', + 'warehouse': 'your_warehouse', + 'role': 'your_role' + }) + config[project_name]['outputs']['prod'].update({ + 'account': 'your_account', + 'user': 'your_username', + 'password': 'your_password', + 'warehouse': 'your_warehouse', + 'role': 'your_role' + }) + # Add other database types as needed + + # Handle existing profiles.yml + if os.path.exists(profiles_path): + console.print(f"[yellow]profiles.yml already exists at {profiles_path}.[/yellow]") + choice = input("What would you like to do? [s]kip/[b]ackup/[o]verwrite/[a]bort: ").strip().lower() + if choice == 's' or choice == '': + console.print("Skipping creation of profiles.yml.\n") + elif choice == 'b': + backup_path = profiles_path + ".bak" + shutil.copy2(profiles_path, backup_path) + console.print(f"Backed up existing profiles.yml to {backup_path}") + with open(profiles_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + console.print(f"\n✅ Created new profiles.yml at {profiles_path}") + elif choice == 'o': + console.print("Overwriting existing profiles.yml.") + with open(profiles_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + console.print(f"\n✅ Overwrote profiles.yml at {profiles_path}") + else: + console.print("Aborted by user.") + return + else: + with open(profiles_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + console.print(f"\n✅ Created profiles.yml at {profiles_path}") + + # Validate if requested + if validate: + try: + validate_profiles_config(config, database_type) + if test_database_connection(config, database_type): + console.print("\n✅ Configuration validation complete") + else: + console.print("\n⚠️ Configuration validation failed. Please check your database connection settings.") + except Exception as e: + console.print(f"\n❌ Error during validation: {str(e)}") + console.print("\n⚠️ Please fix the configuration issues in profiles.yml before proceeding") + + # Check for artifacts + dev_manifest = os.path.join(project_dir, 'target', 'manifest.json') + prod_manifest = os.path.join(project_dir, 'target-base', 'manifest.json') + missing = [] + if not os.path.exists(dev_manifest): + missing.append('target/manifest.json (dev artifacts)') + if not os.path.exists(prod_manifest): + missing.append('target-base/manifest.json (prod/base artifacts)') + if missing: + console.print("\n[yellow]Some required dbt artifacts are missing:[/yellow]") + for m in missing: + console.print(f" - {m}") + console.print("\nTo generate them, you'll need:") + console.print("To run in your dev branch:") + console.print(" dbt docs generate --target dev") + console.print(" # This creates metadata about your current development environment") + console.print("To run in your main or production branch:") + console.print(" dbt docs generate --target prod --target-path target-base") + console.print(" # This creates metadata about your production environment") + console.print("For a detailed explanation of this process, run: recce explain-setup") + console.print("See the Recce docs for more details: https://docs.datarecce.io/") + else: + console.print("\n[green]All required dbt artifacts are present! You're ready to use Recce.[/green]") + + +@cli.command(cls=TrackCommand) +@click.option('--database-type', type=click.Choice(['snowflake', 'bigquery', 'postgres', 'duckdb']), help='Specify database type for validation') +@click.option('--test-connection', is_flag=True, help='Test database connections') +@add_options(dbt_related_options) +def validate(database_type, test_connection, **kwargs): + """ + Validate your existing Recce and dbt configuration. + This command checks your profiles.yml and tests database connections without making any changes. + """ + from rich.console import Console + import yaml + import os + + console = Console() + + # Get profiles directory + profiles_dir = kwargs.get('profiles_dir', os.path.expanduser('~/.dbt')) + profiles_path = os.path.join(profiles_dir, 'profiles.yml') + + if not os.path.exists(profiles_path): + console.print(f"[red]❌ Error: profiles.yml not found at {profiles_path}[/red]") + console.print("\nTo create a new configuration, run: recce init") + return + + # Load and validate profiles.yml + try: + with open(profiles_path, 'r') as f: + profiles_config = yaml.safe_load(f) + except Exception as e: + console.print(f"[red]❌ Error: Failed to load profiles.yml: {str(e)}[/red]") + return + + if not profiles_config: + console.print("[red]❌ Error: profiles.yml is empty[/red]") + return + + # If database type not specified, try to detect it + if not database_type: + # Get the first project's configuration + first_project = next(iter(profiles_config.values())) + if isinstance(first_project, dict) and 'outputs' in first_project: + first_env = next(iter(first_project['outputs'].values())) + if isinstance(first_env, dict) and 'type' in first_env: + database_type = first_env['type'] + console.print(f"[blue]Detected database type: {database_type}[/blue]") + + if not database_type: + console.print("[red]❌ Error: Could not detect database type. Please specify --database-type[/red]") + return + + # Validate configuration + console.print("\n[blue]Validating configuration...[/blue]") + if not validate_profiles_config(profiles_config, database_type): + console.print("\n[yellow]⚠️ Configuration validation failed. Please fix the issues in profiles.yml[/yellow]") + return + + # Test connections if requested + if test_connection: + console.print("\n[blue]Testing database connections...[/blue]") + + # Get project name from dbt_project.yml if available + project_dir = kwargs.get('project_dir', os.getcwd()) + project_name = None + try: + with open(os.path.join(project_dir, 'dbt_project.yml'), 'r') as f: + project_config = yaml.safe_load(f) + project_name = project_config.get('name') + except: + pass + + if not project_name: + # Use the first project name from profiles.yml + project_name = next(iter(profiles_config.keys())) + + # Test each environment + for env in ['dev', 'prod']: + if env not in profiles_config.get(project_name, {}).get('outputs', {}): + console.print(f"[yellow]⚠️ Warning: {env} environment not found in profiles.yml[/yellow]") + continue + + if not test_database_connection(profiles_config, database_type, env): + console.print(f"[red]❌ Failed to connect to {env} environment[/red]") + else: + console.print(f"[green]✅ Successfully connected to {env} environment[/green]") + + console.print("\n[green]✅ Configuration validation complete![/green]") + console.print("\nIf you need to make changes:") + console.print("1. Edit profiles.yml with your database credentials") + console.print("2. Run 'recce validate' again to check your changes") + console.print("3. Run 'recce server' to start Recce") + + +@cli.command() +def explain_setup(): + """Show a detailed explanation of the Recce onboarding and artifact setup process.""" + from rich.console import Console + console = Console() + console.print("\n[blue]To use Recce, we need to generate dbt metadata files (called 'artifacts') for both your development and production environments.[/blue]") + console.print("\n[blue]Step 1: Generate development artifacts[/blue]") + console.print("To run in your dev branch:") + console.print(" dbt docs generate --target dev") + console.print(" # This creates metadata about your current development environment") + console.print("\n[blue]Step 2: Generate production artifacts[/blue]") + console.print("To run in your main or production branch:") + console.print(" dbt docs generate --target prod --target-path target-base") + console.print(" # This creates metadata about your production environment") + console.print("\n[blue]After running these commands, you should see:[/blue]") + console.print(" - A 'target' folder with manifest.json (development)") + console.print(" - A 'target-base' folder with manifest.json (production)\n") + console.print("For more information on setting up Snowflake profiles, visit: https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup") + console.print("See the Recce docs for more details: https://docs.datarecce.io/") + + if __name__ == "__main__": cli() diff --git a/tests/requirements-test.txt b/tests/requirements-test.txt new file mode 100644 index 000000000..5ed8a70aa --- /dev/null +++ b/tests/requirements-test.txt @@ -0,0 +1,5 @@ +pytest>=7.0.0 +pytest-cov>=4.0.0 +click>=8.0.0 +pyyaml>=6.0.0 +rich>=10.0.0 \ No newline at end of file diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py new file mode 100644 index 000000000..f1af8636d --- /dev/null +++ b/tests/test_cli_commands.py @@ -0,0 +1,161 @@ +import os +import tempfile +import yaml +import pytest +from click.testing import CliRunner +from recce.cli import cli, init, validate + +@pytest.fixture +def runner(): + return CliRunner() + +@pytest.fixture +def temp_profiles_dir(tmp_path): + return str(tmp_path) + +@pytest.fixture +def temp_project_dir(tmp_path): + return str(tmp_path) + +def test_init_command(runner, temp_profiles_dir, temp_project_dir): + """Test the init command creates a valid profiles.yml file.""" + result = runner.invoke(init, [ + '--database-type', 'snowflake', + '--profiles-dir', temp_profiles_dir, + '--project-dir', temp_project_dir + ]) + assert result.exit_code == 0 + assert "Created profiles.yml" in result.output + + # Verify the file was created + profiles_path = os.path.join(temp_profiles_dir, 'profiles.yml') + assert os.path.exists(profiles_path) + + # Verify the content + with open(profiles_path, 'r') as f: + config = yaml.safe_load(f) + assert 'your_project_name' in config + assert 'outputs' in config['your_project_name'] + assert 'dev' in config['your_project_name']['outputs'] + assert 'prod' in config['your_project_name']['outputs'] + +def test_validate_command(runner, temp_profiles_dir, temp_project_dir): + """Test the validate command with different scenarios.""" + # First create a valid configuration + runner.invoke(init, [ + '--database-type', 'snowflake', + '--profiles-dir', temp_profiles_dir, + '--project-dir', temp_project_dir + ]) + + # Test validation without connection testing + result = runner.invoke(validate, [ + '--database-type', 'snowflake', + '--profiles-dir', temp_profiles_dir, + '--project-dir', temp_project_dir + ]) + assert result.exit_code == 0 + assert "Configuration validation failed" in result.output + assert "Missing or using default values for required fields" in result.output + +def test_validate_command_missing_profiles(runner, temp_profiles_dir): + """Test validate command with missing profiles.yml.""" + result = runner.invoke(validate, [ + '--database-type', 'snowflake', + '--profiles-dir', temp_profiles_dir + ]) + assert result.exit_code == 0 + assert "profiles.yml not found" in result.output + +def test_validate_command_invalid_profiles(runner, temp_profiles_dir): + """Test validate command with invalid profiles.yml.""" + # Create an invalid profiles.yml + profiles_path = os.path.join(temp_profiles_dir, 'profiles.yml') + with open(profiles_path, 'w') as f: + f.write('invalid: yaml: content:') + + result = runner.invoke(validate, [ + '--database-type', 'snowflake', + '--profiles-dir', temp_profiles_dir + ]) + assert result.exit_code == 0 + assert "Failed to load profiles.yml" in result.output + +def test_validate_command_auto_detect_db_type(runner, temp_profiles_dir): + """Test validate command with auto-detection of database type.""" + # Create a valid profiles.yml with Snowflake configuration + profiles_path = os.path.join(temp_profiles_dir, 'profiles.yml') + config = { + 'test_project': { + 'outputs': { + 'dev': { + 'type': 'snowflake', + 'schema': 'dev', + 'account': 'test', + 'user': 'test', + 'password': 'test', + 'warehouse': 'test', + 'role': 'test' + }, + 'prod': { + 'type': 'snowflake', + 'schema': 'prod', + 'account': 'test', + 'user': 'test', + 'password': 'test', + 'warehouse': 'test', + 'role': 'test' + } + } + } + } + with open(profiles_path, 'w') as f: + yaml.dump(config, f) + + result = runner.invoke(validate, [ + '--profiles-dir', temp_profiles_dir + ]) + assert result.exit_code == 0 + assert "Detected database type: snowflake" in result.output + +def test_init_command_validation(runner, temp_profiles_dir, temp_project_dir): + """Test init command with validation flag.""" + result = runner.invoke(init, [ + '--database-type', 'snowflake', + '--validate', + '--profiles-dir', temp_profiles_dir, + '--project-dir', temp_project_dir + ]) + assert result.exit_code == 0 + assert "Missing or using default values for required fields" in result.output + assert "Please fix the configuration issues" in result.output + +def test_validate_command_missing_env(runner, temp_profiles_dir): + """Test validate command with missing environment configuration.""" + # Create profiles.yml with only dev environment + profiles_path = os.path.join(temp_profiles_dir, 'profiles.yml') + config = { + 'test_project': { + 'outputs': { + 'dev': { + 'type': 'snowflake', + 'schema': 'dev', + 'account': 'test', + 'user': 'test', + 'password': 'test', + 'warehouse': 'test', + 'role': 'test' + } + } + } + } + with open(profiles_path, 'w') as f: + yaml.dump(config, f) + + result = runner.invoke(validate, [ + '--database-type', 'snowflake', + '--test-connection', + '--profiles-dir', temp_profiles_dir + ]) + assert result.exit_code == 0 + assert "Missing dev environment configuration" in result.output \ No newline at end of file