Skip to content

Commit e83b505

Browse files
committed
Moved secrets into ZenML secrets
1 parent 78e94f2 commit e83b505

File tree

11 files changed

+168
-65
lines changed

11 files changed

+168
-65
lines changed

llm-complete-guide/README.md

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ Depending on your setup you may run into some issues when running the `pip insta
4949
In order to use the default LLM for this query, you'll need an account and an
5050
API key from OpenAI specified as another environment variable:
5151

52+
zenml secret create supabase_postgres_db --password="YOUR_PASSWORD" --user="YOU_USER" --host="YOUR_HOST" --port="YOUR_PORT"
53+
5254
```shell
53-
export OPENAI_API_KEY=<your-openai-api-key>
55+
zenml secret create openai --api_key=<your-openai-api-key>
5456
```
5557

5658
### Setting up Supabase
@@ -66,22 +68,15 @@ You'll want to save the Supabase database password as a ZenML secret so that it
6668
isn't stored in plaintext. You can do this by running the following command:
6769

6870
```shell
69-
zenml secret create supabase_postgres_db --password="YOUR_PASSWORD"
71+
zenml secret create supabase_postgres_db --password="YOUR_PASSWORD" --user="YOU_USER" --host="YOUR_HOST" --port="YOUR_PORT"
7072
```
7173

72-
You'll then want to connect to this database instance by getting the connection
74+
You can get the user, host and port for this database instance by getting the connection
7375
string from the Supabase dashboard.
7476

7577
![](.assets/supabase-connection-string.png)
7678

77-
You can use these details to populate some environment variables where the
78-
pipeline code expects them:
79-
80-
```shell
81-
export ZENML_POSTGRES_USER=<your-supabase-user>
82-
export ZENML_POSTGRES_HOST=<your-supabase-host>
83-
export ZENML_POSTGRES_PORT=<your-supabase-port>
84-
```
79+
Alternatively you can use a different database as the backend.
8580

8681
### Running the RAG pipeline
8782

llm-complete-guide/configs/rag.yaml

Lines changed: 0 additions & 16 deletions
This file was deleted.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# environment configuration
2+
settings:
3+
docker:
4+
requirements:
5+
- unstructured
6+
- sentence-transformers>=3
7+
- pgvector
8+
- datasets
9+
- litellm
10+
- numpy
11+
- psycopg2-binary
12+
- tiktoken
13+
- ratelimit
14+
environment:
15+
ZENML_SUPABASE_SECRET_NAME: alexej_supabase_postgres_db
16+
ZENML_OPENAI_SECRET_NAME: alexej_openai
17+
ZENML_ENABLE_RICH_TRACEBACK: FALSE
18+
ZENML_LOGGING_VERBOSITY: INFO
19+
20+
steps:
21+
url_scraper:
22+
parameters:
23+
docs_url: https://docs.zenml.io
24+
generate_embeddings:
25+
step_operator: "terraform-gcp-6c0fd52233ca"
26+
settings:
27+
step_operator.vertex:
28+
accelerator_type: "NVIDIA_TESLA_P100"
29+
accelerator_count: 1
30+
machine_type: "n1-standard-8"
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# environment configuration
2+
settings:
3+
docker:
4+
requirements:
5+
- unstructured
6+
- sentence-transformers>=3
7+
- pgvector
8+
- datasets
9+
- litellm
10+
- numpy
11+
- psycopg2-binary
12+
- tiktoken
13+
- ratelimit
14+
environment:
15+
ZENML_SUPABASE_SECRET_NAME: alexej_supabase_postgres_db
16+
ZENML_OPENAI_SECRET_NAME: alexej_openai
17+
ZENML_ENABLE_RICH_TRACEBACK: FALSE
18+
ZENML_LOGGING_VERBOSITY: INFO
19+
20+
steps:
21+
url_scraper:
22+
parameters:
23+
docs_url: https://docs.zenml.io/stack-components/orchestrators
24+
# generate_embeddings:
25+
# step_operator: "terraform-gcp-6c0fd52233ca"
26+
# settings:
27+
# step_operator.vertex:
28+
# accelerator_type: "NVIDIA_TESLA_P100"
29+
# accelerator_count: 1
30+
# machine_type: "n1-standard-8"

llm-complete-guide/most_basic_eval.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
from openai import OpenAI
2222

23+
from utils.openai_utils import get_openai_api_key
24+
2325

2426
def preprocess_text(text):
2527
text = text.lower()
@@ -51,7 +53,7 @@ def answer_question(query, corpus, top_n=2):
5153
return "I don't have enough information to answer the question."
5254

5355
context = "\n".join(relevant_chunks)
54-
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
56+
client = OpenAI(api_key=get_openai_api_key())
5557
chat_completion = client.chat.completions.create(
5658
messages=[
5759
{
@@ -117,7 +119,7 @@ def evaluate_retrieval(question, expected_answer, corpus, top_n=2):
117119

118120

119121
def evaluate_generation(question, expected_answer, generated_answer):
120-
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
122+
client = OpenAI(api_key=get_openai_api_key())
121123
chat_completion = client.chat.completions.create(
122124
messages=[
123125
{

llm-complete-guide/most_basic_rag_pipeline.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
from openai import OpenAI
2323

24+
from utils.openai_utils import get_openai_api_key
25+
2426

2527
def preprocess_text(text):
2628
text = text.lower()
@@ -52,7 +54,7 @@ def answer_question(query, corpus, top_n=2):
5254
return "I don't have enough information to answer the question."
5355

5456
context = "\n".join(relevant_chunks)
55-
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
57+
client = OpenAI(api_key=get_openai_api_key())
5658
chat_completion = client.chat.completions.create(
5759
messages=[
5860
{

llm-complete-guide/pipelines/llm_basic_rag.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,15 @@
2222
)
2323
from steps.url_scraper import url_scraper
2424
from steps.web_url_loader import web_url_loader
25-
from zenml import pipeline
25+
from zenml import pipeline, Model
26+
27+
model_definition = Model(
28+
name=""
29+
)
2630

2731

2832
@pipeline
29-
def llm_basic_rag() -> None:
33+
def llm_basic_rag(model=model_definition) -> None:
3034
"""Executes the pipeline to train a basic RAG model.
3135
3236
This function performs the following steps:

llm-complete-guide/run.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
llm_basic_rag,
4949
llm_eval,
5050
)
51-
from pipelines.finetune_embeddings_legacy import chunking_experiment
5251
from structures import Document
5352
from zenml.materializers.materializer_registry import materializer_registry
5453

@@ -190,7 +189,7 @@ def main(
190189
print(f"Running Pipeline with pipeline args: {pipeline_args}")
191190
if rag:
192191
config_path = os.path.join(
193-
os.path.dirname(os.path.realpath(__file__)), "configs", "rag.yaml"
192+
os.path.dirname(os.path.realpath(__file__)), "configs", "rag_local_dev.yaml"
194193
)
195194
llm_basic_rag.with_options(config_path=config_path, **pipeline_args)()
196195
if evaluation:

llm-complete-guide/steps/distilabel_generate_queries.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
from distilabel.pipeline import Pipeline
2828
from zenml import step
2929

30+
from utils.openai_utils import get_openai_api_key
31+
3032
synthetic_generation_context = """
3133
The text is a chunk from technical documentation of ZenML.
3234
ZenML is an MLOps + LLMOps framework that makes your infrastructure and workflow metadata accessible to data science teams.
@@ -42,7 +44,7 @@ def generate_synthetic_queries(
4244
Annotated[Dataset, "test_with_queries"],
4345
]:
4446
llm = OpenAILLM(
45-
model=OPENAI_MODEL_GEN, api_key=os.getenv("OPENAI_API_KEY")
47+
model=OPENAI_MODEL_GEN, api_key=get_openai_api_key()
4648
)
4749

4850
with Pipeline(

llm-complete-guide/utils/llm_utils.py

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
import logging
2323

24+
from zenml.cli import secret
25+
2426
# Configure logging levels for specific modules
2527
logging.getLogger("pytorch").setLevel(logging.CRITICAL)
2628
logging.getLogger("sentence-transformers").setLevel(logging.CRITICAL)
@@ -212,48 +214,76 @@ def split_documents(
212214
return chunked_documents
213215

214216

215-
def get_local_db_connection_details() -> Dict[str, str]:
216-
"""Returns the connection details for the local database.
217+
def get_db_password(secret_name: str) -> str:
218+
"""Returns the password for the PostgreSQL database.
217219
218220
Returns:
219-
dict: A dictionary containing the connection details for the local
220-
database.
221+
str: The password for the PostgreSQL database.
222+
"""
223+
password = os.getenv("ZENML_POSTGRES_DB_PASSWORD")
224+
if not password:
225+
from zenml.client import Client
221226

222-
Raises:
223-
RuntimeError: If the environment variables ZENML_POSTGRES_USER, ZENML_POSTGRES_HOST, or ZENML_POSTGRES_PORT are not set.
227+
password = (
228+
Client()
229+
.get_secret(secret_name)
230+
.secret_values["password"]
231+
)
232+
return password
233+
234+
235+
def get_db_user(secret_name: str) -> str:
236+
"""Returns the user for the PostgreSQL database.
237+
238+
Returns:
239+
str: The user for the PostgreSQL database.
224240
"""
225241
user = os.getenv("ZENML_POSTGRES_USER")
226-
host = os.getenv("ZENML_POSTGRES_HOST")
227-
port = os.getenv("ZENML_POSTGRES_PORT")
242+
if not user:
243+
from zenml.client import Client
228244

229-
if not user or not host or not port:
230-
raise RuntimeError(
231-
"Please make sure to set the environment variables: ZENML_POSTGRES_USER, ZENML_POSTGRES_HOST, and ZENML_POSTGRES_PORT"
245+
user = (
246+
Client()
247+
.get_secret(secret_name)
248+
.secret_values["user"]
232249
)
250+
return user
233251

234-
return {
235-
"user": user,
236-
"host": host,
237-
"port": port,
238-
}
239252

253+
def get_db_host(secret_name: str) -> str:
254+
"""Returns the host for the PostgreSQL database.
240255
241-
def get_db_password() -> str:
242-
"""Returns the password for the PostgreSQL database.
256+
Returns:
257+
str: The host for the PostgreSQL database.
258+
"""
259+
host = os.getenv("ZENML_POSTGRES_HOST")
260+
if not host:
261+
from zenml.client import Client
262+
263+
host = (
264+
Client()
265+
.get_secret(secret_name)
266+
.secret_values["host"]
267+
)
268+
return host
269+
270+
271+
def get_db_port(secret_name: str) -> str:
272+
"""Returns the port for the PostgreSQL database.
243273
244274
Returns:
245-
str: The password for the PostgreSQL database.
275+
str: The port for the PostgreSQL database.
246276
"""
247-
password = os.getenv("ZENML_POSTGRES_DB_PASSWORD")
248-
if not password:
277+
port = os.getenv("ZENML_POSTGRES_DB_PASSWORD")
278+
if not port:
249279
from zenml.client import Client
250280

251-
password = (
281+
port = (
252282
Client()
253283
.get_secret("supabase_postgres_db")
254-
.secret_values["password"]
284+
.secret_values["port"]
255285
)
256-
return password
286+
return port
257287

258288

259289
def get_db_conn() -> connection:
@@ -265,15 +295,19 @@ def get_db_conn() -> connection:
265295
Returns:
266296
connection: A psycopg2 connection object to the PostgreSQL database.
267297
"""
268-
pg_password = get_db_password()
298+
secret_name = os.getenv("ZENML_SUPABASE_SECRET_NAME")
269299

270-
local_database_connection = get_local_db_connection_details()
300+
if not secret_name:
301+
raise RuntimeError(
302+
"Please make sure to set the environment variable: ZENML_SUPABASE_SECRET_NAME to point at the secret that "
303+
"contains your supabase connection details."
304+
)
271305

272306
CONNECTION_DETAILS = {
273-
"user": local_database_connection["user"],
274-
"password": pg_password,
275-
"host": local_database_connection["host"],
276-
"port": local_database_connection["port"],
307+
"user": get_db_user(secret_name),
308+
"password": get_db_password(secret_name),
309+
"host": get_db_host(secret_name),
310+
"port": get_db_port(secret_name),
277311
"dbname": "postgres",
278312
}
279313

0 commit comments

Comments
 (0)