Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions end-to-end-computer-vision/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
logger = get_logger(__name__)


def load_images_from_folder(folder):
images = []
def load_images_from_folder(folder: str) -> List[Image.Image]:
images: List[Image.Image] = []
for filename in os.listdir(folder):
if (
filename.endswith(".png")
Expand All @@ -45,7 +45,9 @@ def load_images_from_folder(folder):
return images


def load_images_from_source(data_source, download_dir, filenames):
def load_images_from_source(
data_source: str, download_dir: str, filenames: List[str]
) -> None:
total_images = len(filenames)
for index, filename in enumerate(filenames):
src_path = f"{data_source}/{filename}.png"
Expand Down
11 changes: 7 additions & 4 deletions llm-complete-guide/most_basic_rag_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,26 @@

import re
import string
from typing import List

from openai import OpenAI
from utils.openai_utils import get_openai_api_key


def preprocess_text(text):
def preprocess_text(text: str) -> str:
text = text.lower()
text = text.translate(str.maketrans("", "", string.punctuation))
text = re.sub(r"\s+", " ", text).strip()
return text


def tokenize(text):
def tokenize(text: str) -> List[str]:
return preprocess_text(text).split()


def retrieve_relevant_chunks(query, corpus, top_n=2):
def retrieve_relevant_chunks(
query: str, corpus: List[str], top_n: int = 2
) -> List[str]:
query_tokens = set(tokenize(query))
similarities = []
for chunk in corpus:
Expand All @@ -46,7 +49,7 @@ def retrieve_relevant_chunks(query, corpus, top_n=2):
return [chunk for chunk, _ in similarities[:top_n]]


def answer_question(query, corpus, top_n=2):
def answer_question(query: str, corpus: List[str], top_n: int = 2) -> str:
relevant_chunks = retrieve_relevant_chunks(query, corpus, top_n)
if not relevant_chunks:
return "I don't have enough information to answer the question."
Expand Down
8 changes: 7 additions & 1 deletion nightwatch-ai/src/pipelines/supabase_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,19 @@
# permissions and limitations under the License.


from typing import Any, Callable

from zenml.pipelines import pipeline

pipeline_name = "daily_supabase_summary"


@pipeline(name=pipeline_name)
def daily_supabase_summary(get_latest_data, generate_summary, report_summary):
def daily_supabase_summary(
get_latest_data: Callable[[], Any],
generate_summary: Callable[[Any], Any],
report_summary: Callable[[Any], Any],
) -> None:
"""Generates a summary of the latest data.

Args:
Expand Down
2 changes: 1 addition & 1 deletion nightwatch-ai/src/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from zenml.client import Client


def main():
def main() -> None:
if Client().active_stack.alerter is None:
# we use a print alerter
alerter = print_alerter()
Expand Down
4 changes: 2 additions & 2 deletions zencoder/steps/deployment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Optional, cast
from typing import Dict, Optional, Tuple, cast

from zenml import get_step_context, step
from zenml.client import Client
Expand All @@ -14,7 +14,7 @@
logger = get_logger(__name__)


def parse_huggingface_url(url):
def parse_huggingface_url(url: str) -> Tuple[str, str, str]:
# Split the URL into parts
parts = url.split("/")

Expand Down
22 changes: 11 additions & 11 deletions zencoder/steps/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import functools
import os
import random
from typing import Optional, Tuple
from typing import List, Optional, Tuple

import numpy as np
import torch
Expand Down Expand Up @@ -66,16 +66,16 @@ def get_fim_token_ids(tokenizer):

## Adapted from https://github.com/bigcode-project/Megatron-LM/blob/6c4bf908df8fd86b4977f54bf5b8bd4b521003d1/megatron/data/gpt_dataset.py
def permute(
sample,
np_rng,
suffix_tok_id,
prefix_tok_id,
middle_tok_id,
pad_tok_id,
fim_rate=0.5,
fim_spm_rate=0.5,
truncate_or_pad=False,
):
sample: List[int],
np_rng: np.random.RandomState,
suffix_tok_id: Optional[int],
prefix_tok_id: Optional[int],
middle_tok_id: Optional[int],
pad_tok_id: Optional[int],
fim_rate: float = 0.5,
fim_spm_rate: float = 0.5,
truncate_or_pad: bool = False,
) -> Tuple[List[int], np.random.RandomState]:
"""
Take in a sample (list of tokens) and perform a FIM transformation on it with a probability of fim_rate, using two FIM modes:
PSM and SPM (with a probability of fim_spm_rate).
Expand Down
Loading