Skip to content

Commit 2c591d3

Browse files
liferoadtvalentyn
andauthored
Change src to my_project (#22)
* Change src to my_project * Update README.md Co-authored-by: tvalentyn <[email protected]> --------- Co-authored-by: tvalentyn <[email protected]>
1 parent aedddc0 commit 2c591d3

File tree

11 files changed

+31
-34
lines changed

11 files changed

+31
-34
lines changed

.isort.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import_heading_stdlib=standard libraries
1919
import_heading_thirdparty=third party libraries
2020
include_trailing_comma=True
2121
indent=' '
22-
known_dfml=src
22+
known_dfml=my_project
2323
dedup_headings=True
2424
line_length=120
2525
multi_line_output=3

Makefile

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ SILENT:
1717
.DEFAULT_GOAL := help
1818

1919
# Load environment variables from .env file
20+
TF_MODEL_URI :=
2021
include .env
2122
export
2223

@@ -64,6 +65,7 @@ init: init-venv ## Init virtual environment
6465
@./venv/bin/python3 -m pre_commit install --install-hooks --overwrite
6566
@mkdir -p beam-output
6667
@echo "use 'source venv/bin/activate' to activate venv "
68+
@./venv/bin/python3 -m pip install -e .
6769

6870
format: ## Run formatter on source code
6971
@./venv/bin/python3 -m black --config=pyproject.toml .
@@ -85,18 +87,18 @@ clean: clean-lite ## Remove virtual environment, downloaded models, etc
8587
@echo "run 'make init'"
8688

8789
test: lint ## Run tests
88-
@PYTHONPATH="./:./src" ./venv/bin/pytest -s -vv --cov=src --cov-fail-under=50 tests/
90+
./venv/bin/pytest -s -vv --cov=my_project --cov-fail-under=50 tests/
8991

9092
run-direct: ## Run a local test with DirectRunner
9193
@rm -f beam-output/beam_test_out.txt
9294
ifeq ($(MODEL_ENV), "TORCH")
93-
time ./venv/bin/python3 -m src.run \
95+
time ./venv/bin/python3 -m my_project.run \
9496
--input data/openimage_10.txt \
9597
--output beam-output/beam_test_out.txt \
9698
--model_state_dict_path $(MODEL_STATE_DICT_PATH) \
9799
--model_name $(MODEL_NAME)
98100
else
99-
time ./venv/bin/python3 -m src.run \
101+
time ./venv/bin/python3 -m my_project.run \
100102
--input data/openimage_10.txt \
101103
--output beam-output/beam_test_out.txt \
102104
--tf_model_uri $(TF_MODEL_URI)
@@ -110,7 +112,7 @@ docker: ## Build a custom docker image and push it to Artifact Registry
110112
run-df-gpu: ## Run a Dataflow job using the custom container with GPUs
111113
$(eval JOB_NAME := beam-ml-starter-gpu-$(shell date +%s)-$(shell echo $$$$))
112114
ifeq ($(MODEL_ENV), "TORCH")
113-
time ./venv/bin/python3 -m src.run \
115+
time ./venv/bin/python3 -m my_project.run \
114116
--runner DataflowRunner \
115117
--job_name $(JOB_NAME) \
116118
--project $(PROJECT_ID) \
@@ -132,7 +134,7 @@ ifeq ($(MODEL_ENV), "TORCH")
132134
--model_state_dict_path $(MODEL_STATE_DICT_PATH) \
133135
--model_name $(MODEL_NAME)
134136
else
135-
time ./venv/bin/python3 -m src.run \
137+
time ./venv/bin/python3 -m my_project.run \
136138
--runner DataflowRunner \
137139
--job_name $(JOB_NAME) \
138140
--project $(PROJECT_ID) \
@@ -158,7 +160,7 @@ run-df-cpu: ## Run a Dataflow job with CPUs and without Custom Container
158160
@$(shell sed "s|\$${BEAM_VERSION}|$(BEAM_VERSION)|g" requirements.txt > beam-output/requirements.txt)
159161
@$(eval JOB_NAME := beam-ml-starter-cpu-$(shell date +%s)-$(shell echo $$$$))
160162
ifeq ($(MODEL_ENV), "TORCH")
161-
time ./venv/bin/python3 -m src.run \
163+
time ./venv/bin/python3 -m my_project.run \
162164
--runner DataflowRunner \
163165
--job_name $(JOB_NAME) \
164166
--project $(PROJECT_ID) \
@@ -174,7 +176,7 @@ ifeq ($(MODEL_ENV), "TORCH")
174176
--model_state_dict_path $(MODEL_STATE_DICT_PATH) \
175177
--model_name $(MODEL_NAME)
176178
else
177-
time ./venv/bin/python3 -m src.run \
179+
time ./venv/bin/python3 -m my_project.run \
178180
--runner DataflowRunner \
179181
--job_name $(JOB_NAME) \
180182
--project $(PROJECT_ID) \

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ newgrp docker
4141
├── requirements.prod.txt <- Packages for the production environment and produces `requirements.txt`
4242
├── scripts <- utility bash scripts
4343
├── setup.py <- Used in `python setup.py sdist` to create the multi-file python package
44-
├── src <- Source code for use in this project
45-
│   ├── __init__.py <- Makes src a Python module
44+
├── my_project <- Source code for use in this project, also your python package module name
45+
│   ├── __init__.py <- Makes my_project a Python package
4646
│   ├── config.py <- `pydantic` model classes to define sources, sinks, and models
4747
│   ├── pipeline.py <- Builds the Beam RunInference pipeline
4848
│   └── run.py <- A run module to parse the command options and run the Beam pipeline
@@ -118,10 +118,10 @@ The entire code flows in this way:
118118
* `run.py` is called by the`Makefile` targets to parse the input arguments and set `ModelConfig`, `SourceConfig`, and `SinkConfig` defined in `config.py`, then calls `build_pipeline` from `pipeline.py` to build the final Beam pipeline
119119

120120

121-
To customize the pipeline, modify `build_pipeline` in [pipeline.py](https://github.com/google/dataflow-ml-starter/blob/main/src/pipeline.py). It defines how to read the image data from TextIO, pre-process the images, score them, post-process the predictions,
121+
To customize the pipeline, modify `build_pipeline` in [pipeline.py](https://github.com/google/dataflow-ml-starter/blob/main/my_project/pipeline.py). It defines how to read the image data from TextIO, pre-process the images, score them, post-process the predictions,
122122
and at last save the results using TextIO.
123123

124-
[config.py](https://github.com/google/dataflow-ml-starter/blob/main/src/config.py) contains a set of `pydantic` models to specify the configurations for sources, sinks, and models and validate them. Users can easily add more Pytorch classification models. [Here](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/examples/inference) contains more examples.
124+
[config.py](https://github.com/google/dataflow-ml-starter/blob/main/my_project/config.py) contains a set of `pydantic` models to specify the configurations for sources, sinks, and models and validate them. Users can easily add more Pytorch classification models. [Here](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/examples/inference) contains more examples.
125125

126126
### `.env` Details
127127

@@ -286,7 +286,7 @@ More importantly, building the flex templates container from the custom SDK cont
286286

287287
Since the custom container is already created, it is straightforward to adapt Dataflow Flex Templates:
288288
1. create a [`metadata.json`](https://github.com/google/dataflow-ml-starter/blob/main/flex/metadata.json) file that contains the parameters required by your Beam pipeline. In this example, we can add `input`, `output`, `device`, `model_name`, `model_state_dict_path`, and `tf_model_uri` as the parameters that can be passed in by users. [Here](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates#example-metadata-file) is another example metadata file.
289-
2. convert the custom container to your template container following [this](https://cloud.google.com/dataflow/docs/guides/templates/configuring-flex-templates#use_custom_container_images). [`tensorflow_gpu.flex.Dockerfile`](https://github.com/google/dataflow-ml-starter/blob/main/tensorflow_gpu.flex.Dockerfile) is one example converted from `tensorflow_gpu.Dockerfile`. Only two parts are needed: switch to the Dataflow Template launcher entrypoint and package `src` into this container. Change `CUSTOM_CONTAINER_IMAGE` in `.env` and run `make docker` to create the custom container for Flex Templates.
289+
2. convert the custom container to your template container following [this](https://cloud.google.com/dataflow/docs/guides/templates/configuring-flex-templates#use_custom_container_images). [`tensorflow_gpu.flex.Dockerfile`](https://github.com/google/dataflow-ml-starter/blob/main/tensorflow_gpu.flex.Dockerfile) is one example converted from `tensorflow_gpu.Dockerfile`. Only two parts are needed: switch to the Dataflow Template launcher entrypoint and package `my_project` into this container. Change `CUSTOM_CONTAINER_IMAGE` in `.env` and run `make docker` to create the custom container for Flex Templates.
290290
3. `make create-flex-template` creates a template spec file in a Cloud Storage bucket defined by the env `TEMPLATE_FILE_GCS_PATH` that contains all of the necessary information to run the job, such as the SDK information and metadata. This calls the CLI `gcloud dataflow flex-template build`.
291291
4. `make run-df-gpu-flex` runs a Flex Template pipeline using the spec file from `TEMPLATE_FILE_GCS_PATH`. This calls the CLI `gcloud dataflow flex-template run`.
292292

File renamed without changes.
File renamed without changes.

src/pipeline.py renamed to my_project/pipeline.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,8 @@
3131
from PIL import Image
3232
from torchvision import models, transforms
3333

34-
try:
35-
from .config import ModelConfig, ModelName, SinkConfig, SourceConfig
36-
except ImportError:
37-
from config import ModelConfig, ModelName, SinkConfig, SourceConfig
34+
# Dataflow ML libraries
35+
from my_project.config import ModelConfig, ModelName, SinkConfig, SourceConfig
3836

3937
import tensorflow as tf # isort:skip
4038

src/run.py renamed to my_project/run.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,9 @@
2323
from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions
2424
from apache_beam.runners.runner import PipelineResult
2525

26-
try:
27-
from .config import ModelConfig, SinkConfig, SourceConfig
28-
from .pipeline import build_pipeline
29-
except ImportError:
30-
from config import ModelConfig, SinkConfig, SourceConfig
31-
from pipeline import build_pipeline
26+
# Dataflow ML libraries
27+
from my_project.config import ModelConfig, SinkConfig, SourceConfig
28+
from my_project.pipeline import build_pipeline
3229

3330

3431
def parse_known_args(argv):

scripts/check-pipeline.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ vm_ssh="gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PRO
4242
vm_scp="gcloud compute scp --strict-host-key-checking=no --project $PROJECT_ID --zone=$ZONE --quiet"
4343

4444
# Package the local code and copy it to VM
45-
PACKAGE_NAME="src-0.0.1"
45+
PACKAGE_NAME="my_project-0.0.1"
4646
python3 setup.py sdist
4747
$vm_ssh "sudo rm -fr ~/*"
4848
$vm_scp dist/$PACKAGE_NAME.tar.gz data/openimage_10.txt $VM_NAME:~/
@@ -54,13 +54,13 @@ echo "Running the PyTorch model on GPU..."
5454
$vm_ssh "docker run --entrypoint /bin/bash \
5555
--volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
5656
--volume /home/\$USER/:/workspace/\$USER --privileged $CUSTOM_CONTAINER_IMAGE -c \
57-
\"cd \$USER/$PACKAGE_NAME; python -m src.run --input openimage_10.txt --output beam-output/beam_test_out.txt --model_state_dict_path $MODEL_STATE_DICT_PATH --model_name $MODEL_NAME --device GPU\""
57+
\"cd \$USER/$PACKAGE_NAME; python -m my_project.run --input openimage_10.txt --output beam-output/beam_test_out.txt --model_state_dict_path $MODEL_STATE_DICT_PATH --model_name $MODEL_NAME --device GPU\""
5858
else
5959
echo "Running the Tensorflow model on GPU..."
6060
$vm_ssh "docker run --entrypoint /bin/bash \
6161
--volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
6262
--volume /home/\$USER/:/workspace/\$USER --privileged $CUSTOM_CONTAINER_IMAGE -c \
63-
\"cd \$USER/$PACKAGE_NAME; python -m src.run --input openimage_10.txt --output beam-output/beam_test_out.txt --tf_model_uri $TF_MODEL_URI --device GPU\""
63+
\"cd \$USER/$PACKAGE_NAME; python -m my_project.run --input openimage_10.txt --output beam-output/beam_test_out.txt --tf_model_uri $TF_MODEL_URI --device GPU\""
6464
fi
6565

6666
$vm_ssh "[ -f './$PACKAGE_NAME/beam-output/beam_test_out.txt' ] && echo 'The DirectRunner run succeeded on GPU!' || echo 'The DirectRunner run failed on GPU!'"

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
required = f.read().splitlines()
2525

2626
setuptools.setup(
27-
name="src",
27+
name="my_project",
2828
version="0.0.1",
2929
install_requires=required,
30-
packages=["src"],
30+
packages=["my_project"],
3131
)

tensorflow_gpu.flex.Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,16 @@ RUN \
4545
&& pip install --no-cache-dir torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118
4646

4747
# Copy the run module
48-
COPY src/ /workspace/src
49-
RUN rm -fr /workspace/src/__pycache__
48+
COPY my_project/ /workspace/my_project
49+
RUN rm -fr /workspace/my_project/__pycache__
5050

5151
#Specifies which Python file to run to launch the Flex Template.
52-
ENV FLEX_TEMPLATE_PYTHON_PY_FILE="src/run.py"
52+
ENV FLEX_TEMPLATE_PYTHON_PY_FILE="my_project/run.py"
5353

5454
# Since we already downloaded all the dependencies, there's no need to rebuild everything.
5555
ENV PIP_NO_DEPS=True
5656

57-
ENV PYTHONPATH "${PYTHONPATH}:/workspace/src/"
57+
ENV PYTHONPATH "${PYTHONPATH}:/workspace/my_project/"
5858

5959
# Copy the Dataflow Template launcher
6060
COPY --from=template_launcher /opt/google/dataflow/python_template_launcher /opt/google/dataflow/python_template_launcher

0 commit comments

Comments
 (0)