Skip to content

Commit 2d18902

Browse files
authored
Merge pull request #66 from techops-recsys-lateral-hiring/devcon
Development containers
2 parents 0092c2c + a7ef3a9 commit 2d18902

File tree

3 files changed

+71
-3
lines changed

3 files changed

+71
-3
lines changed

.devcontainer/devcontainer.json

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"name": "Transformation - Python",
3+
"image": "mcr.microsoft.com/devcontainers/base:1-debian",
4+
"features": {
5+
"ghcr.io/devcontainers/features/java:1": {
6+
"version": "17",
7+
"jdkDistro": "open",
8+
"gradleVersion": "latest",
9+
"mavenVersion": "latest",
10+
"antVersion": "latest",
11+
"groovyVersion": "latest"
12+
},
13+
"ghcr.io/devcontainers/features/python:1": {
14+
"version": "3.13"
15+
},
16+
"ghcr.io/devcontainers-extra/features/poetry:2": {
17+
"version": "latest"
18+
}
19+
},
20+
"containerEnv": {
21+
"PYTHONUNBUFFERED": "1"
22+
},
23+
"postCreateCommand": "poetry install",
24+
"customizations": {
25+
"vscode": {
26+
"extensions": [
27+
"ms-python.python",
28+
"ms-python.vscode-pylance",
29+
"charliermarsh.ruff",
30+
"ms-toolsai.jupyter"
31+
],
32+
"settings": {
33+
"python.testing.pytestEnabled": true,
34+
"python.testing.pytestArgs": ["tests"]
35+
}
36+
}
37+
}
38+
}

README.md

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ These jobs are using _PySpark_ to process larger volumes of data and are suppose
1818
1919
### Local Setup
2020

21+
> 💡 Use the [Devcontainer setup](#devcontainer-setup) if you encounter issues.
22+
2123
#### Pre-requisites
2224

2325
Please make sure you have the following installed and can run them
@@ -32,12 +34,31 @@ We recommend using WSL 2 on Windows for this exercise, due to the [lack of suppo
3234

3335
Follow instructions on the [Windows official page](https://learn.microsoft.com/en-us/windows/wsl/setup/environment) and then the linux install.
3436

37+
> 💡 Use the [Devcontainer setup](#devcontainer-setup) if you encounter issues.
38+
3539
#### Install all dependencies
3640

3741
```bash
3842
poetry install
3943
```
4044

45+
### Devcontainer setup
46+
47+
Configuration to use dev containers is provided in `.devcontainer`
48+
49+
> ⚠️ this take up to 7 minutes to setup, make sure to have things running before the interview.
50+
51+
### In Github codespaces
52+
53+
1. [Fork](https://github.com/techops-recsys-lateral-hiring/dataengineer-transformations-python/fork) this repository.
54+
2. Follow [codespace instructions](https://docs.github.com/en/codespaces/developing-in-a-codespace/creating-a-codespace-for-a-repository#the-codespace-creation-process) from the forked repository, to create the environment.
55+
56+
#### In VSCode - Alternative
57+
58+
This requires a working local docker setup matching your OS and licensing situation, and [VSCode](https://code.visualstudio.com/download).
59+
60+
If you have all of these, follow instructions in https://code.visualstudio.com/docs/devcontainers/containers. Otherwise, consider using codespaces.
61+
4162
### Verify setup
4263

4364
> All of the following commands should be running successfully
@@ -87,6 +108,7 @@ The following section provides context over them.
87108
```
88109
89110
/
111+
├─ /.devcontainer # Contains configurations for dev containers
90112
├─ /data_transformations # Contains the main python library
91113
│ # with the code to the transformations
92114
@@ -102,7 +124,6 @@ The following section provides context over them.
102124
│ # and the setup
103125
104126
├─ .gitignore
105-
├─ .pylintrc # configuration for pylint
106127
├─ LICENCE
107128
├─ poetry.lock
108129
├─ pyproject.toml

tests/integration/test_validate_spark_environment.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,15 @@ def __extract_version_line(java_version_output: str) -> str:
3434
(line for line in java_version_output.splitlines() if "version" in line), None
3535
)
3636
if not version_line:
37-
pytest.fail("Couldn't find version information in `java -version` output.")
37+
pytest.fail(
38+
"Couldn't find version information in `java -version` output.")
3839
return version_line
3940

4041

4142
# pylint: disable=R1710
4243
def __parse_major_version(version_line: str) -> int:
43-
version_regex = re.compile(r'version "(?P<major>\d+)\.(?P<minor>\d+)\.\w+"')
44+
version_regex = re.compile(
45+
r'version "(?P<major>\d+)\.(?P<minor>\d+)\.\w+"')
4446
match = version_regex.search(version_line)
4547
if match is not None:
4648
major_version = int(match.group("major"))
@@ -49,4 +51,11 @@ def __parse_major_version(version_line: str) -> int:
4951
# https://softwareengineering.stackexchange.com/questions/175075/why-is-java-version-1-x-referred-to-as-java-x
5052
major_version = int(match.group("minor"))
5153
return major_version
54+
55+
# Opensource versions follow an alternative system
56+
alternative_version_regex = re.compile(r'version "(?P<major>\d+)"')
57+
match = alternative_version_regex.search(version_line)
58+
if match is not None:
59+
major_version = int(match.group("major"))
60+
return major_version
5261
pytest.fail(f"Couldn't parse Java version from {version_line}.")

0 commit comments

Comments
 (0)