Skip to content

Commit 623fd47

Browse files
Merge pull request #31 from DataKitchen/release/3.7.1
Release/3.7.1
2 parents c8dd8dc + 31e7763 commit 623fd47

File tree

139 files changed

+4589
-1369
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+4589
-1369
lines changed

deploy/install_arrow.sh

Lines changed: 0 additions & 50 deletions
This file was deleted.

deploy/install_linuxodbc.sh

Lines changed: 0 additions & 32 deletions
This file was deleted.

deploy/testgen-base.dockerfile

Lines changed: 22 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -6,77 +6,41 @@ ENV PYTHONDONTWRITEBYTECODE=1
66
ENV PYTHONFAULTHANDLER=1
77
ENV ACCEPT_EULA=Y
88

9-
RUN apk update && apk add --no-cache \
9+
RUN apk update && apk upgrade && apk add --no-cache \
10+
# Tools needed for building the python wheels
1011
gcc \
1112
g++ \
12-
bash \
13-
libffi-dev \
14-
openssl-dev \
15-
cargo \
16-
musl-dev \
17-
postgresql-dev \
13+
make \
1814
cmake \
19-
rust \
20-
linux-headers \
21-
libc-dev \
22-
libgcc \
23-
libstdc++ \
24-
ca-certificates \
25-
zlib-dev \
26-
bzip2-dev \
27-
xz-dev \
28-
lz4-dev \
29-
zstd-dev \
30-
snappy-dev \
31-
brotli-dev \
32-
build-base \
33-
autoconf \
34-
boost-dev \
35-
flex \
36-
libxml2-dev \
37-
libxslt-dev \
38-
libjpeg-turbo-dev \
39-
ninja \
40-
git \
41-
curl \
42-
unixodbc-dev \
43-
gpg \
44-
openssl=3.3.2-r1 \
15+
musl-dev \
4516
gfortran \
46-
openblas-dev
47-
48-
RUN mkdir /dk
17+
linux-headers=6.6-r0 \
18+
# Additional libraries needed and their dev counterparts. We add both so that we can remove
19+
# the *-dev later, keeping the libraries
20+
openblas=0.3.28-r0 \
21+
openblas-dev=0.3.28-r0 \
22+
unixodbc=2.3.12-r0 \
23+
unixodbc-dev=2.3.12-r0
4924

50-
COPY --chmod=775 ./deploy/install_linuxodbc.sh /tmp/dk/install_linuxodbc.sh
51-
RUN /tmp/dk/install_linuxodbc.sh
52-
53-
COPY --chmod=775 ./deploy/install_arrow.sh /tmp/dk/install_arrow.sh
54-
RUN /tmp/dk/install_arrow.sh
25+
RUN apk add --no-cache \
26+
--repository https://dl-cdn.alpinelinux.org/alpine/v3.21/community \
27+
--repository https://dl-cdn.alpinelinux.org/alpine/v3.21/main \
28+
libarrow=18.1.0-r0 \
29+
apache-arrow-dev=18.1.0-r0
5530

5631
# Install TestGen's main project empty pyproject.toml to install (and cache) the dependencies first
5732
COPY ./pyproject.toml /tmp/dk/pyproject.toml
33+
RUN mkdir /dk
5834
RUN python3 -m pip install --prefix=/dk /tmp/dk
5935

6036
RUN apk del \
6137
gcc \
6238
g++ \
63-
bash \
64-
libffi-dev \
65-
openssl-dev \
66-
cargo \
67-
musl-dev \
68-
postgresql-dev \
39+
make \
6940
cmake \
70-
rust \
41+
musl-dev \
42+
gfortran \
7143
linux-headers \
72-
libc-dev \
73-
build-base \
74-
autoconf \
75-
boost-dev \
76-
flex \
77-
ninja \
78-
curl \
44+
openblas-dev \
7945
unixodbc-dev \
80-
gpg \
81-
ca-certificates \
82-
git
46+
apache-arrow-dev

deploy/testgen.dockerfile

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,30 @@
1-
ARG TESTGEN_BASE_LABEL=v1
1+
ARG TESTGEN_BASE_LABEL=v3
22

3-
FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS build-image
4-
5-
# Now install everything
6-
COPY . /tmp/dk/
7-
RUN python3 -m pip install --prefix=/dk /tmp/dk
8-
9-
FROM python:3.12.7-alpine3.20 AS release-image
3+
FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS release-image
104

115
# Args have to be set in current build stage: https://github.com/moby/moby/issues/37345
126
ARG TESTGEN_VERSION
137
ARG TESTGEN_DOCKER_HUB_REPO
148

15-
RUN addgroup -S testgen && adduser -S testgen -G testgen
9+
# Now install everything
10+
COPY . /tmp/dk/
11+
RUN python3 -m pip install --prefix=/dk /tmp/dk
12+
RUN rm -Rf /tmp/dk
1613

17-
COPY --from=build-image --chown=testgen:testgen /dk/ /dk
18-
COPY --from=build-image /usr/local/lib/ /usr/local/lib
19-
COPY --from=build-image /usr/lib/ /usr/lib
20-
COPY --from=build-image /opt/microsoft/ /opt/microsoft
21-
COPY --from=build-image /etc/odbcinst.ini /etc/odbcinst.ini
14+
RUN addgroup -S testgen && adduser -S testgen -G testgen
2215

23-
# The OpenSSL upgrade is not carried from the build image, so we have to upgrade it again
24-
#RUN apk add --no-cache openssl=3.3.2-r1
16+
# Streamlit has to be able to write to these dirs
17+
RUN mkdir /var/lib/testgen
18+
RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.12/site-packages/streamlit/static
2519

2620
ENV PYTHONPATH=/dk/lib/python3.12/site-packages
27-
ENV PATH="$PATH:/dk/bin:/opt/mssql-tools/bin/"
21+
ENV PATH=$PATH:/dk/bin
2822

2923
ENV TESTGEN_VERSION=${TESTGEN_VERSION}
3024
ENV TG_RELEASE_CHECK=docker
3125
ENV TESTGEN_DOCKER_HUB_REPO=${TESTGEN_DOCKER_HUB_REPO}
3226
ENV STREAMLIT_SERVER_MAX_UPLOAD_SIZE=200
3327

34-
RUN mkdir /var/lib/testgen && chown testgen:testgen /var/lib/testgen
35-
3628
USER testgen
3729

3830
WORKDIR /dk

pyproject.toml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
88

99
[project]
1010
name = "dataops-testgen"
11-
version = "3.1.2"
11+
version = "3.7.1"
1212
description = "DataKitchen's Data Quality DataOps TestGen"
1313
authors = [
1414
{ "name" = "DataKitchen, Inc.", "email" = "[email protected]" },
@@ -31,6 +31,7 @@ dependencies = [
3131
"PyYAML==6.0.1",
3232
"click==8.1.3",
3333
"sqlalchemy==1.4.46",
34+
"databricks-sql-connector==2.9.3",
3435
"snowflake-sqlalchemy==1.6.1",
3536
"pyodbc==5.0.0",
3637
"psycopg2-binary==2.9.9",
@@ -56,17 +57,19 @@ dependencies = [
5657
"xlsxwriter==3.2.0",
5758
"psutil==5.9.8",
5859
"concurrent_log_handler==0.9.25",
59-
"cryptography==43.0.1",
60+
"cryptography==44.0.1",
6061
"validators==0.33.0",
6162
"reportlab==4.2.2",
62-
"pydantic==1.10.11",
63+
"pydantic==1.10.13",
6364
"streamlit-pydantic==0.6.0",
6465

6566
# Pinned to match the manually compiled libs or for security
66-
"pyarrow==18.0.0",
67-
"snowflake-connector-python==3.12.3",
67+
"pyarrow==18.1.0",
68+
"snowflake-connector-python==3.13.1",
6869
"matplotlib==3.9.2",
6970
"scipy==1.14.1",
71+
"tornado==6.4.2",
72+
"jinja2==3.1.6",
7073
]
7174

7275
[project.optional-dependencies]

testgen/commands/queries/execute_cat_tests_query.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ def __init__(self, strProjectCode, strTestSuiteId, strTestSuite, strSQLFlavor, m
3939
def _get_rollup_scores_sql(self) -> CRollupScoresSQL:
4040
if not self._rollup_scores_sql:
4141
self._rollup_scores_sql = CRollupScoresSQL(self.test_run_id, self.table_groups_id)
42-
42+
4343
return self._rollup_scores_sql
44-
44+
4545
def _ReplaceParms(self, strInputString):
4646
strInputString = strInputString.replace("{MAX_QUERY_CHARS}", str(self.max_query_chars))
4747
strInputString = strInputString.replace("{TEST_RUN_ID}", self.test_run_id)
@@ -51,6 +51,7 @@ def _ReplaceParms(self, strInputString):
5151
strInputString = strInputString.replace("{TABLE_GROUPS_ID}", self.table_groups_id)
5252

5353
strInputString = strInputString.replace("{SQL_FLAVOR}", self.flavor)
54+
strInputString = strInputString.replace("{ID_SEPARATOR}", "`" if self.flavor == "databricks" else '"')
5455
strInputString = strInputString.replace("{CONCAT_OPERATOR}", self.concat_operator)
5556

5657
strInputString = strInputString.replace("{SCHEMA_NAME}", self.target_schema)
@@ -71,8 +72,9 @@ def _ReplaceParms(self, strInputString):
7172

7273
strInputString = replace_templated_functions(strInputString, self.flavor)
7374

74-
# Adding escape character where ':' is referenced
75-
strInputString = strInputString.replace(":", "\\:")
75+
if self.flavor != "databricks":
76+
# Adding escape character where ':' is referenced
77+
strInputString = strInputString.replace(":", "\\:")
7678

7779
return strInputString
7880

@@ -110,12 +112,12 @@ def PushTestRunStatusUpdateSQL(self):
110112
def FinalizeTestSuiteUpdateSQL(self):
111113
strQ = self._ReplaceParms(read_template_sql_file("ex_update_test_suite.sql", "execution"))
112114
return strQ
113-
115+
114116
def CalcPrevalenceTestResultsSQL(self):
115117
return self._ReplaceParms(read_template_sql_file("ex_calc_prevalence_test_results.sql", "execution"))
116118

117119
def TestScoringRollupRunSQL(self):
118120
return self._get_rollup_scores_sql().GetRollupScoresTestRunQuery()
119-
121+
120122
def TestScoringRollupTableGroupSQL(self):
121123
return self._get_rollup_scores_sql().GetRollupScoresTestTableGroupQuery()

testgen/commands/queries/execute_tests_query.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def _ReplaceParms(self, strInputString: str):
5353
strInputString = strInputString.replace("{EXCEPTION_MESSAGE}", self.exception_message)
5454
strInputString = strInputString.replace("{START_TIME}", self.today)
5555
strInputString = strInputString.replace("{PROCESS_ID}", str(self.process_id))
56+
strInputString = strInputString.replace("{VARCHAR_TYPE}", "STRING" if self.flavor == "databricks" else "VARCHAR")
5657
strInputString = strInputString.replace(
5758
"{NOW}", date_service.get_now_as_string_with_offset(self.minutes_offset)
5859
)
@@ -88,9 +89,9 @@ def _ReplaceParms(self, strInputString: str):
8889
if parm == "subset_condition":
8990
strInputString = strInputString.replace("{SUBSET_DISPLAY}", value.replace("'", "''") if value else "")
9091

91-
92-
# Adding escape character where ':' is referenced
93-
strInputString = strInputString.replace(":", "\\:")
92+
if self.flavor != "databricks":
93+
# Adding escape character where ':' is referenced
94+
strInputString = strInputString.replace(":", "\\:")
9495

9596
return strInputString
9697

testgen/commands/run_execute_cat_tests.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
RunActionQueryList,
88
RunThreadedRetrievalQueryList,
99
WriteListToDB,
10+
date_service,
1011
)
1112

1213
LOG = logging.getLogger("testgen")
@@ -67,7 +68,11 @@ def FinalizeTestRun(clsCATExecute: CCATExecutionSQL):
6768
clsCATExecute.TestScoringRollupRunSQL(),
6869
clsCATExecute.TestScoringRollupTableGroupSQL()]
6970
RunActionQueryList(("DKTG"), lstQueries)
70-
run_refresh_score_cards_results(project_code=clsCATExecute.project_code)
71+
run_refresh_score_cards_results(
72+
project_code=clsCATExecute.project_code,
73+
add_history_entry=True,
74+
refresh_date=date_service.parse_now(clsCATExecute.run_date),
75+
)
7176

7277

7378
def run_cat_test_queries(

testgen/commands/run_execute_tests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def run_execution_steps(project_code: str, test_suite: str, minutes_offset: int=
144144
test_exec_params["connect_by_key"],
145145
test_exec_params["private_key"],
146146
test_exec_params["private_key_passphrase"],
147+
test_exec_params["http_path"],
147148
"PROJECT",
148149
)
149150

testgen/commands/run_generate_tests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None):
3434
dctParms["connect_by_key"],
3535
dctParms["private_key"],
3636
dctParms["private_key_passphrase"],
37+
dctParms["http_path"],
3738
"PROJECT",
3839
)
3940

0 commit comments

Comments
 (0)