Skip to content

Snowflake sql ddl import to datacontract issue with sqlglot add back simple-ddl-parser with improvements #789

@dmaresma

Description

@dmaresma

Hi folks, very good job,
This issue is related with #686 , #662 and #651
I have an issue when importing Snowflake heavy feature ddl like :

CREATE TABLE IF NOT EXISTS  ${database_name}.PUBLIC.my_table (
  -- https://docs.snowflake.com/en/sql-reference/intro-summary-data-types
  field_primary_key      NUMBER(38,0) NOT NULL autoincrement start 1 increment 1 noorder COMMENT 'Primary key',
  field_not_null         INT NOT NULL     COMMENT 'Not null',
  field_char             CHAR(10)         COMMENT 'Fixed-length string',
  field_character        CHARACTER(10)    COMMENT 'Fixed-length string',
  field_varchar          VARCHAR(100) WITH TAG (SNOWFLAKE.CORE.PRIVACY_CATEGORY='IDENTIFIER', SNOWFLAKE.CORE.SEMANTIC_CATEGORY='NAME') COMMENT 'Variable-length string',
 
  field_text             TEXT             COMMENT 'Large variable-length string',
  field_string           STRING           COMMENT 'Large variable-length Unicode string',

  field_tinyint          TINYINT          COMMENT 'Integer (0-255)',
  field_smallint         SMALLINT         COMMENT 'Integer (-32,768 to 32,767)',
  field_int              INT              COMMENT 'Integer (-2.1B to 2.1B)',
  field_integer          INTEGER          COMMENT 'Integer full name(-2.1B to 2.1B)',
  field_bigint           BIGINT           COMMENT 'Large integer (-9 quintillion to 9 quintillion)',

  field_decimal          DECIMAL(10, 2)   COMMENT 'Fixed precision decimal',
  field_numeric          NUMERIC(10, 2)   COMMENT 'Same as DECIMAL',

  field_float            FLOAT            COMMENT 'Approximate floating-point',
  field_float4           FLOAT4           COMMENT 'Approximate floating-point 4',
  field_float8           FLOAT8           COMMENT 'Approximate floating-point 8',
  field_real             REAL             COMMENT 'Smaller floating-point',
 
  field_boulean          BOOLEAN          COMMENT 'Boolean-like (0 or 1)',

  field_date             DATE             COMMENT 'Date only (YYYY-MM-DD)',
  field_time             TIME             COMMENT 'Time only (HH:MM:SS)',
  field_timestamp        TIMESTAMP        COMMENT 'More precise datetime',
  field_timestamp_ltz    TIMESTAMP_LTZ    COMMENT 'More precise datetime with local time zone; time zone, if provided, isn`t stored.',
  field_timestamp_ntz    TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP() COMMENT 'More precise datetime with no time zone; time zone, if provided, isn`t stored.',
  field_timestamp_tz     TIMESTAMP_TZ     COMMENT 'More precise datetime with time zone.',

  field_binary           BINARY(16)       COMMENT 'Fixed-length binary',
  field_varbinary        VARBINARY(100)   COMMENT 'Variable-length binary',

  field_variant          VARIANT          COMMENT 'VARIANT data',
  field_json             OBJECT           COMMENT 'JSON (Stored as text)',
  UNIQUE(field_not_null),
  PRIMARY KEY (field_primary_key)
) COMMENT = 'My Comment'

in the current state the following test fail for test_import_sql_snowflake.py
`import yaml

from datacontract.data_contract import DataContract

sql_file_path = "fixtures/snowflake/import/ddl.sql"

def test_import_sql_snowflake():

result = DataContract().import_from_source("sql", sql_file_path, dialect="snowflake")

expected = """
dataContractSpecification: 1.1.0
id: my-data-contract-id
info:
title: My Data Contract
version: 0.0.1
servers:
snowflake:
type: snowflake
models:
my_table:
description: My Comment
type: table
fields:
field_primary_key:
type: decimal
required: true
description: Primary key
precision: 38
scale: 0
config:
snowflakeType: NUMBER(38,0) AUTOINCREMENT START 1 INCREMENT 1 NOORDER
field_not_null:
type: int
required: true
unique: true
description: Not null
config:
snowflakeType: INT
field_char:
type: string
description: Fixed-length string
maxLength: 10
config:
snowflakeType: CHAR(10)
field_character:
type: string
description: Fixed-length string
maxLength: 10
config:
snowflakeType: CHARACTER(10)
field_varchar:
type: string
description: Variable-length string
maxLength: 100
tags: ["SNOWFLAKE.CORE.PRIVACY_CATEGORY='IDENTIFIER'", "SNOWFLAKE.CORE.SEMANTIC_CATEGORY='NAME'"]
config:
snowflakeType: VARCHAR(100)
field_text:
type: string
description: Large variable-length string
config:
snowflakeType: TEXT
field_string:
type: string
description: Large variable-length Unicode string
config:
snowflakeType: STRING
field_tinyint:
type: int
description: Integer ( 0-255)
config:
snowflakeType: TINYINT
field_smallint:
type: int
description: Integer ( -32 , 768 to 32 , 767)
config:
snowflakeType: SMALLINT
field_int:
type: int
description: Integer ( -2.1B to 2.1B)
config:
snowflakeType: INT
field_integer:
type: int
description: Integer full name ( -2.1B to 2.1B)
config:
snowflakeType: INTEGER
field_bigint:
type: long
description: Large integer ( -9 quintillion to 9 quintillion)
config:
snowflakeType: BIGINT
field_decimal:
type: decimal
description: Fixed precision decimal
precision: 10
scale: 2
config:
snowflakeType: DECIMAL(10,2)
field_numeric:
type: decimal
description: Same as DECIMAL
precision: 10
scale: 2
config:
snowflakeType: NUMERIC(10,2)
field_float:
type: float
description: Approximate floating-point
config:
snowflakeType: FLOAT
field_float4:
type: float
description: Approximate floating-point 4
config:
snowflakeType: FLOAT4
field_float8:
type: float
description: Approximate floating-point 8
config:
snowflakeType: FLOAT8
field_real:
type: float
description: Smaller floating-point
config:
snowflakeType: REAL
field_boulean:
type: boolean
description: Boolean-like ( 0 or 1)
config:
snowflakeType: BOOLEAN
field_date:
type: date
description: Date only ( YYYY-MM-DD)
config:
snowflakeType: DATE
field_time:
type: string
description: Time only ( HH:MM:SS)
config:
snowflakeType: TIME
field_timestamp:
type: timestamp_ntz
description: More precise datetime
config:
snowflakeType: TIMESTAMP
field_timestamp_ltz:
type: timestamp_tz
description: More precise datetime with local time zone; time zone , if provided
, isnt stored. config: snowflakeType: TIMESTAMP_LTZ field_timestamp_ntz: type: timestamp_ntz description: More precise datetime with no time zone; time zone , if provided , isnt stored.
config:
snowflakeType: TIMESTAMP_NTZ
field_timestamp_tz:
type: timestamp_tz
description: More precise datetime with time zone.
config:
snowflakeType: TIMESTAMP_TZ
field_binary:
type: bytes
description: Fixed-length binary
maxLength: 16
config:
snowflakeType: BINARY(16)
field_varbinary:
type: bytes
description: Variable-length binary
maxLength: 100
config:
snowflakeType: VARBINARY(100)
field_variant:
type: object
description: VARIANT data
config:
snowflakeType: VARIANT
field_json:
type: object
description: JSON ( Stored as text)
config:
snowflakeType: OBJECT"""

print("Result", result.to_yaml())
assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected)

Disable linters so we don't get "missing description" warnings

assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed()
`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions