Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
22a9bbd
feat: Added python venv to gitignore
elliott-with-the-longest-name-on-github Jan 28, 2022
88dd46e
feat: Added config and sensible default values
elliott-with-the-longest-name-on-github Jan 29, 2022
b0191de
feat: Stubbed out TSQL generator
elliott-with-the-longest-name-on-github Jan 29, 2022
63ecd4f
feat: Added column definitions
elliott-with-the-longest-name-on-github Jan 29, 2022
7e2a266
feat: Added better holiday config
elliott-with-the-longest-name-on-github Jan 29, 2022
3190f4d
chore: Reorganized now that I've got a clearer structure in mind
elliott-with-the-longest-name-on-github Jan 31, 2022
ed6f081
chore: Moved generated output to its own directory
elliott-with-the-longest-name-on-github Jan 31, 2022
a5c86c6
fix: Oops, added .venv
elliott-with-the-longest-name-on-github Jan 31, 2022
2713910
feat: Finished stubbing out tsql generator
elliott-with-the-longest-name-on-github Jan 31, 2022
069e0ba
fix: Renamed object keys
elliott-with-the-longest-name-on-github Jan 31, 2022
45c6625
fix: typo
elliott-with-the-longest-name-on-github Feb 1, 2022
1f07869
feat: Extended column defs for TSQL
elliott-with-the-longest-name-on-github Feb 1, 2022
2c0c9a2
fix: Added default_factory for mutable defaults
elliott-with-the-longest-name-on-github Feb 1, 2022
500f1f0
feat: Finally figured out column extension
elliott-with-the-longest-name-on-github Feb 1, 2022
c5ab8bb
feat: Added package docs
elliott-with-the-longest-name-on-github Feb 1, 2022
c65f4fa
chore: Updated .gitignore
elliott-with-the-longest-name-on-github Feb 1, 2022
c08c508
feat: Setup script generation working
elliott-with-the-longest-name-on-github Feb 1, 2022
6970bcf
feat: Holiday insert scripts done
elliott-with-the-longest-name-on-github Feb 1, 2022
04fcef6
fix: Better naming
elliott-with-the-longest-name-on-github Feb 1, 2022
4fe2bce
feat: DimDate load script gen works
elliott-with-the-longest-name-on-github Feb 2, 2022
d9f5113
feat: Added more holiday customization
elliott-with-the-longest-name-on-github Feb 3, 2022
ddbfc55
feat: Added Black and isort
elliott-with-the-longest-name-on-github Feb 3, 2022
3bf855d
feat: Added lint action
elliott-with-the-longest-name-on-github Feb 3, 2022
17402c9
feat: Added requirements[-dev].txt
elliott-with-the-longest-name-on-github Feb 3, 2022
c33dfb9
fix: Typo
elliott-with-the-longest-name-on-github Feb 3, 2022
d30b5bd
fix: Updated timezone config
elliott-with-the-longest-name-on-github Feb 3, 2022
2abae19
feat: Reorg again for better public API
elliott-with-the-longest-name-on-github Feb 10, 2022
8b525ca
feat: DimFiscalMonth inserts
elliott-with-the-longest-name-on-github Feb 11, 2022
2d64ca7
feat: DimCalendarMonth inserts
elliott-with-the-longest-name-on-github Feb 11, 2022
89fcb83
feat: Added DimDate refresh proc
elliott-with-the-longest-name-on-github Feb 11, 2022
b69c27e
feat: Dim[Fiscal|Calendar]Month refresh procs
elliott-with-the-longest-name-on-github Feb 11, 2022
10d317e
fix: Simplification
elliott-with-the-longest-name-on-github Feb 11, 2022
0148765
fix: Simplification
elliott-with-the-longest-name-on-github Feb 11, 2022
05689f0
feat: Constraints
elliott-with-the-longest-name-on-github Feb 11, 2022
1ad513b
fix: Misc fixes from testing
elliott-with-the-longest-name-on-github Feb 12, 2022
e1a7182
feat: Better column_factory
elliott-with-the-longest-name-on-github Feb 14, 2022
8f4d1cd
fix: Excluded columns are excluded
elliott-with-the-longest-name-on-github Feb 14, 2022
c53d59b
feat: Tons of testing, still more to go
elliott-with-the-longest-name-on-github Feb 15, 2022
6c748bf
feat: Added unittest to pre-commit hooks
elliott-with-the-longest-name-on-github Feb 15, 2022
4995cbe
fix: Added pre-commit to requirements-dev
elliott-with-the-longest-name-on-github Feb 27, 2022
9cb9869
fix: Class names
elliott-with-the-longest-name-on-github Feb 27, 2022
4cdbdf4
fix: Missing unittest.main()
elliott-with-the-longest-name-on-github Feb 27, 2022
63972f6
chore: Added JetBrains .idea file to .gitignore
elliott-with-the-longest-name-on-github Mar 1, 2022
4bcd427
feat: Tests for fiscal months
elliott-with-the-longest-name-on-github Mar 1, 2022
8ec418c
feat: Tests for DimCalendarMonths
elliott-with-the-longest-name-on-github Mar 1, 2022
b15e823
feat: Config tests
elliott-with-the-longest-name-on-github Mar 1, 2022
61b5edd
feat: Versioned config
elliott-with-the-longest-name-on-github Mar 1, 2022
f553c9d
feat: config_factory tests
elliott-with-the-longest-name-on-github Mar 1, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .github/workflows/black.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Lint

on: [push, pull_request]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.8
- uses: psf/black@stable
- uses: jamescurtin/isort-action@master
with:
requirementsFiles: "requirements.txt requirements-dev.txt"
configuration: "--check-only --diff --profile black"
149 changes: 149 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
.idea
src/awesome_date_dimension/.venv
output/test
test.py
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/
20 changes: 20 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
repos:
- repo: https://github.com/psf/black
rev: 22.1.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
args: ["--profile", "black"]
name: isort (python)
- repo: local
hooks:
- id: unittest
name: unittest
entry: python -m unittest discover -s tests
language: system
'types': [python]
pass_filenames: false
stages: [commit]
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

# awesome-date-dimension

A few months back, I had to create a date dimension. All of the scripts I could find publicly were missing a lot of the flags and other features I needed (especially around fiscal month handling) -- so I created one myself. This is written in T-SQL, but shouldn't be _too_ hard to port to another dialect of SQL.
Expand Down
Empty file.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from ...config import Config


def dim_calendar_month_constraints_template(config: Config) -> str:
dcm_conf = config.dim_calendar_month
return f"""ALTER TABLE {dcm_conf.table_schema}.{dcm_conf.table_name}
ADD PRIMARY KEY CLUSTERED ({dcm_conf.columns.month_start_key.name}, {dcm_conf.columns.month_end_key.name} ASC);

CREATE NONCLUSTERED INDEX IDX_NC_{dcm_conf.table_schema}_{dcm_conf.table_name}_{dcm_conf.columns.month_start_date.name} ON {dcm_conf.table_schema}.{dcm_conf.table_name} ({dcm_conf.columns.month_start_date.name});
CREATE NONCLUSTERED INDEX IDX_NC_{dcm_conf.table_schema}_{dcm_conf.table_name}_{dcm_conf.columns.month_end_date.name} ON {dcm_conf.table_schema}.{dcm_conf.table_name} ({dcm_conf.columns.month_end_date.name});
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
from ...config import Config
from .tsql_columns import TSQLDimCalendarMonthColumns


def dim_calendar_month_insert_template(
config: Config, columns: TSQLDimCalendarMonthColumns
) -> str:
dcm_conf = config.dim_calendar_month
dcm_cols = dcm_conf.columns
dd_conf = config.dim_date
dd_cols = dd_conf.columns
h_conf = config.holidays
holiday_columndef: list[str] = []
holiday_colselect: list[str] = []
if h_conf.generate_holidays:
for i, t in enumerate(h_conf.holiday_types):
holiday_columndef.append(
f"{t.generated_column_prefix}{t.generated_monthly_count_column_postfix} = SUM({t.generated_column_prefix}{t.generated_flag_column_postfix} * 1)"
)
holiday_colselect.append(
f"{t.generated_column_prefix}{t.generated_monthly_count_column_postfix}"
)

holiday_columndef_str = ",\n ".join(holiday_columndef)
else:
holiday_columndef_str = ""

dcm_to_dd_colmap = {
dcm_cols.month_start_date.name: f"startdate.{dd_cols.the_date.name}",
dcm_cols.month_end_date.name: f"enddate.{dd_cols.the_date.name}",
dcm_cols.month_start_iso_date_name.name: f"startdate.{dd_cols.iso_date_name.name}",
dcm_cols.month_end_iso_date_name.name: f"enddate.{dd_cols.iso_date_name.name}",
dcm_cols.month_start_iso_week_date_name.name: f"startdate.{dd_cols.iso_week_date_name.name}",
dcm_cols.month_end_iso_week_date_name.name: f"enddate.{dd_cols.iso_week_date_name.name}",
dcm_cols.month_start_american_date_name.name: f"startdate.{dd_cols.american_date_name.name}",
dcm_cols.month_end_american_date_name.name: f"enddate.{dd_cols.american_date_name.name}",
dcm_cols.month_name.name: f"startdate.{dd_cols.month_name.name}",
dcm_cols.month_abbrev.name: f"startdate.{dd_cols.month_abbrev.name}",
dcm_cols.month_start_year_week_name.name: f"startdate.{dd_cols.year_week_name.name}",
dcm_cols.month_end_year_week_name.name: f"enddate.{dd_cols.year_week_name.name}",
dcm_cols.year_month_name.name: f"startdate.{dd_cols.year_month_name.name}",
dcm_cols.month_year_name.name: f"startdate.{dd_cols.month_year_name.name}",
dcm_cols.year_quarter_name.name: f"startdate.{dd_cols.year_quarter_name.name}",
dcm_cols.year.name: f"startdate.{dd_cols.year.name}",
dcm_cols.month_start_year_week.name: f"startdate.{dd_cols.year_week.name}",
dcm_cols.month_end_year_week.name: f"enddate.{dd_cols.year_week.name}",
dcm_cols.year_month.name: f"startdate.{dd_cols.year_month.name}",
dcm_cols.year_quarter.name: f"startdate.{dd_cols.year_quarter.name}",
dcm_cols.month_start_day_of_quarter.name: f"startdate.{dd_cols.day_of_quarter.name}",
dcm_cols.month_end_day_of_quarter.name: f"enddate.{dd_cols.day_of_quarter.name}",
dcm_cols.month_start_day_of_year.name: f"startdate.{dd_cols.day_of_year.name}",
dcm_cols.month_end_day_of_year.name: f"enddate.{dd_cols.day_of_year.name}",
dcm_cols.month_start_week_of_quarter.name: f"startdate.{dd_cols.week_of_quarter.name}",
dcm_cols.month_end_week_of_quarter.name: f"enddate.{dd_cols.week_of_quarter.name}",
dcm_cols.month_start_week_of_year.name: f"startdate.{dd_cols.week_of_year.name}",
dcm_cols.month_end_week_of_year.name: f"enddate.{dd_cols.week_of_year.name}",
dcm_cols.month_of_quarter.name: f"startdate.{dd_cols.month_of_quarter.name}",
dcm_cols.quarter.name: f"startdate.{dd_cols.quarter.name}",
dcm_cols.days_in_month.name: f"startdate.{dd_cols.days_in_month.name}",
dcm_cols.days_in_quarter.name: f"startdate.{dd_cols.days_in_quarter.name}",
dcm_cols.days_in_year.name: f"startdate.{dd_cols.days_in_year.name}",
dcm_cols.current_month_flag.name: f"startdate.{dd_cols.current_month_flag.name}",
dcm_cols.prior_month_flag.name: f"startdate.{dd_cols.prior_month_flag.name}",
dcm_cols.next_month_flag.name: f"startdate.{dd_cols.next_month_flag.name}",
dcm_cols.current_quarter_flag.name: f"startdate.{dd_cols.current_quarter_flag.name}",
dcm_cols.prior_quarter_flag.name: f"startdate.{dd_cols.prior_quarter_flag.name}",
dcm_cols.next_quarter_flag.name: f"startdate.{dd_cols.next_quarter_flag.name}",
dcm_cols.current_year_flag.name: f"startdate.{dd_cols.current_year_flag.name}",
dcm_cols.prior_year_flag.name: f"startdate.{dd_cols.prior_year_flag.name}",
dcm_cols.next_year_flag.name: f"startdate.{dd_cols.next_year_flag.name}",
dcm_cols.first_day_of_month_flag.name: f"startdate.{dd_cols.first_day_of_month_flag.name}",
dcm_cols.last_day_of_month_flag.name: f"startdate.{dd_cols.last_day_of_month_flag.name}",
dcm_cols.first_day_of_quarter_flag.name: f"startdate.{dd_cols.first_day_of_quarter_flag.name}",
dcm_cols.last_day_of_quarter_flag.name: f"startdate.{dd_cols.last_day_of_quarter_flag.name}",
dcm_cols.first_day_of_year_flag.name: f"startdate.{dd_cols.first_day_of_year_flag.name}",
dcm_cols.last_day_of_year_flag.name: f"startdate.{dd_cols.last_day_of_year_flag.name}",
dcm_cols.month_start_fraction_of_quarter.name: f"startdate.{dd_cols.fraction_of_quarter.name}",
dcm_cols.month_end_fraction_of_quarter.name: f"enddate.{dd_cols.fraction_of_quarter.name}",
dcm_cols.month_start_fraction_of_year.name: f"startdate.{dd_cols.fraction_of_year.name}",
dcm_cols.month_end_fraction_of_year.name: f"enddate.{dd_cols.fraction_of_year.name}",
dcm_cols.current_quarter_start.name: f"startdate.{dd_cols.current_quarter_start.name}",
dcm_cols.current_quarter_end.name: f"startdate.{dd_cols.current_quarter_end.name}",
dcm_cols.current_year_start.name: f"startdate.{dd_cols.current_year_start.name}",
dcm_cols.current_year_end.name: f"startdate.{dd_cols.current_year_end.name}",
dcm_cols.prior_month_start.name: f"startdate.{dd_cols.prior_month_start.name}",
dcm_cols.prior_month_end.name: f"startdate.{dd_cols.prior_month_end.name}",
dcm_cols.prior_quarter_start.name: f"startdate.{dd_cols.prior_quarter_start.name}",
dcm_cols.prior_quarter_end.name: f"startdate.{dd_cols.prior_quarter_end.name}",
dcm_cols.prior_year_start.name: f"startdate.{dd_cols.prior_year_start.name}",
dcm_cols.prior_year_end.name: f"startdate.{dd_cols.prior_year_end.name}",
dcm_cols.next_month_start.name: f"startdate.{dd_cols.next_month_start.name}",
dcm_cols.next_month_end.name: f"startdate.{dd_cols.next_month_end.name}",
dcm_cols.next_quarter_start.name: f"startdate.{dd_cols.next_quarter_start.name}",
dcm_cols.next_quarter_end.name: f"startdate.{dd_cols.next_quarter_end.name}",
dcm_cols.next_year_start.name: f"startdate.{dd_cols.next_year_start.name}",
dcm_cols.next_year_end.name: f"startdate.{dd_cols.next_year_end.name}",
dcm_cols.month_start_quarterly_burnup.name: f"startdate.{dd_cols.quarterly_burnup.name}",
dcm_cols.month_end_quarterly_burnup.name: f"enddate.{dd_cols.quarterly_burnup.name}",
dcm_cols.month_start_yearly_burnup.name: f"startdate.{dd_cols.yearly_burnup.name}",
dcm_cols.month_end_yearly_burnup.name: f"enddate.{dd_cols.yearly_burnup.name}",
}

insert_columns_clause = ",\n ".join((c.name for c in columns))
select_columns = []
for col in columns:
if (dd_name := dcm_to_dd_colmap.get(col.name)) is not None:
select_columns.append(f"{col.name} = {dd_name}")
else:
select_columns.append(f"{col.name} = base.{col.name}")

select_columns_clause = ",\n ".join(select_columns)

return f"""WITH DistinctMonths AS (
SELECT
{dcm_cols.month_start_key.name} = CONVERT(
int,
CONVERT(
varchar(8),
{dd_cols.current_month_start.name},
112
)
),
{dcm_cols.month_end_key.name} = CONVERT(
int,
CONVERT(
varchar(8),
{dd_cols.current_month_end.name},
112
)
),
{holiday_columndef_str}
FROM
{dd_conf.table_schema}.{dd_conf.table_name}
GROUP BY {dd_cols.current_month_start.name}, {dd_cols.current_month_end.name}
)

INSERT INTO {dcm_conf.table_schema}.{dcm_conf.table_name} (
{insert_columns_clause}
)
-- Yank the day-level stuff we need for both the start and end dates from {dd_conf.table_name}
SELECT
{select_columns_clause}
FROM
DistinctMonths AS base
INNER JOIN {dd_conf.table_schema}.{dd_conf.table_name} AS startdate
ON base.{dcm_cols.month_start_key.name} = startdate.{dd_cols.date_key.name}
INNER JOIN {dd_conf.table_schema}.{dd_conf.table_name} AS enddate
ON base.{dcm_cols.month_end_key.name} = enddate.{dd_cols.date_key.name};"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from ...config import Config
from .dim_calendar_month_insert_template import dim_calendar_month_insert_template
from .tsql_columns import TSQLDimCalendarMonthColumns


def dim_calendar_month_refresh_template(
config: Config, columns: TSQLDimCalendarMonthColumns
) -> str:
indentation_level = " "
insert_script = dim_calendar_month_insert_template(config, columns)
indented_script = "\n".join(
map(lambda line: indentation_level + line, insert_script.split("\n"))
)
return f"""CREATE PROCEDURE dbo.sp_build_DimCalendarMonth AS BEGIN
SET XACT_ABORT ON;
BEGIN TRY
BEGIN TRANSACTION;

TRUNCATE TABLE dbo.DimCalendarMonth;

{indented_script}

COMMIT TRANSACTION;
END TRY
BEGIN CATCH
SELECT
ERROR_NUMBER() AS ErrorNumber,
ERROR_SEVERITY() AS ErrorSeverity,
ERROR_STATE() AS ErrorState,
ERROR_LINE () AS ErrorLine,
ERROR_PROCEDURE() AS ErrorProcedure,
ERROR_MESSAGE() AS ErrorMessage;
IF @@TRANCOUNT > 0
ROLLBACK TRANSACTION;
THROW;
END CATCH;
END
GO
"""
Loading