Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Please provide a short summary explaining the motivation behind these changes.

# Checklist
- [ ] I have read the [contributing guidelines](CONTRIBUTING.md).
- [ ] I have read the [contributing guidelines](../CONTRIBUTING.md).
- [ ] I have run the necessary tests and linters.
- [ ] I have updated relevant documentation where applicable.

Expand Down
8 changes: 5 additions & 3 deletions end-to-end-computer-vision/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
logger = get_logger(__name__)


def load_images_from_folder(folder):
images = []
def load_images_from_folder(folder: str) -> List[Image.Image]:
images: List[Image.Image] = []
for filename in os.listdir(folder):
if (
filename.endswith(".png")
Expand All @@ -45,7 +45,9 @@ def load_images_from_folder(folder):
return images


def load_images_from_source(data_source, download_dir, filenames):
def load_images_from_source(
data_source: str, download_dir: str, filenames: List[str]
) -> None:
total_images = len(filenames)
for index, filename in enumerate(filenames):
src_path = f"{data_source}/{filename}.png"
Expand Down
11 changes: 7 additions & 4 deletions llm-complete-guide/most_basic_rag_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,26 @@

import re
import string
from typing import List

from openai import OpenAI
from utils.openai_utils import get_openai_api_key


def preprocess_text(text):
def preprocess_text(text: str) -> str:
text = text.lower()
text = text.translate(str.maketrans("", "", string.punctuation))
text = re.sub(r"\s+", " ", text).strip()
return text


def tokenize(text):
def tokenize(text: str) -> List[str]:
return preprocess_text(text).split()


def retrieve_relevant_chunks(query, corpus, top_n=2):
def retrieve_relevant_chunks(
query: str, corpus: List[str], top_n: int = 2
) -> List[str]:
query_tokens = set(tokenize(query))
similarities = []
for chunk in corpus:
Expand All @@ -46,7 +49,7 @@ def retrieve_relevant_chunks(query, corpus, top_n=2):
return [chunk for chunk, _ in similarities[:top_n]]


def answer_question(query, corpus, top_n=2):
def answer_question(query: str, corpus: List[str], top_n: int = 2) -> str:
relevant_chunks = retrieve_relevant_chunks(query, corpus, top_n)
if not relevant_chunks:
return "I don't have enough information to answer the question."
Expand Down
8 changes: 7 additions & 1 deletion nightwatch-ai/src/pipelines/supabase_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,19 @@
# permissions and limitations under the License.


from typing import Any, Callable

from zenml.pipelines import pipeline

pipeline_name = "daily_supabase_summary"


@pipeline(name=pipeline_name)
def daily_supabase_summary(get_latest_data, generate_summary, report_summary):
def daily_supabase_summary(
get_latest_data: Callable[[], Any],
generate_summary: Callable[[Any], Any],
report_summary: Callable[[Any], Any],
) -> None:
"""Generates a summary of the latest data.

Args:
Expand Down
2 changes: 1 addition & 1 deletion nightwatch-ai/src/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from zenml.client import Client


def main():
def main() -> None:
if Client().active_stack.alerter is None:
# we use a print alerter
alerter = print_alerter()
Expand Down
20 changes: 18 additions & 2 deletions zencoder/steps/deployment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Optional, cast
from typing import Dict, Optional, Tuple, cast

from zenml import get_step_context, step
from zenml.client import Client
Expand All @@ -14,7 +14,23 @@
logger = get_logger(__name__)


def parse_huggingface_url(url):
def parse_huggingface_url(url: str) -> Tuple[str, str, str]:
"""
Parses a Hugging Face Hub URL to extract the namespace, repository, and revision.

Args:
url: The Hugging Face Hub URL to parse. Expected format:
"https://huggingface.co/{namespace}/{repository}/tree/{revision}".

Returns:
A tuple containing:
- namespace: The owner or organization of the repository.
- repository: The name of the repository.
- revision: The specific commit hash or branch name.

Raises:
ValueError: If the URL does not match the expected format.
"""
# Split the URL into parts
parts = url.split("/")

Expand Down
22 changes: 11 additions & 11 deletions zencoder/steps/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import functools
import os
import random
from typing import Optional, Tuple
from typing import List, Optional, Tuple

import numpy as np
import torch
Expand Down Expand Up @@ -66,16 +66,16 @@ def get_fim_token_ids(tokenizer):

## Adapted from https://github.com/bigcode-project/Megatron-LM/blob/6c4bf908df8fd86b4977f54bf5b8bd4b521003d1/megatron/data/gpt_dataset.py
def permute(
sample,
np_rng,
suffix_tok_id,
prefix_tok_id,
middle_tok_id,
pad_tok_id,
fim_rate=0.5,
fim_spm_rate=0.5,
truncate_or_pad=False,
):
sample: List[int],
np_rng: np.random.RandomState,
suffix_tok_id: Optional[int],
prefix_tok_id: Optional[int],
middle_tok_id: Optional[int],
pad_tok_id: Optional[int],
fim_rate: float = 0.5,
fim_spm_rate: float = 0.5,
truncate_or_pad: bool = False,
) -> Tuple[List[int], np.random.RandomState]:
"""
Take in a sample (list of tokens) and perform a FIM transformation on it with a probability of fim_rate, using two FIM modes:
PSM and SPM (with a probability of fim_spm_rate).
Expand Down