codefortulsa
diff --git a/‎.github/workflows/python-app.yml‎
Lines changed: 43 additions & 0 deletions b/‎.github/workflows/python-app.yml‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 26 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎notebooks/experiments/minutes_diarization/test_data/25-153-1_25-153-1 2025-02-12 4PM Minutes.pdf‎
109 KB b/‎notebooks/experiments/minutes_diarization/test_data/25-153-1_25-153-1 2025-02-12 4PM Minutes.pdf‎
109 KB
diff --git a/‎notebooks/experiments/minutes_diarization/test_data/25-153-2_25-153-2 2025-02-12 5PM Minutes.pdf‎
182 KB b/‎notebooks/experiments/minutes_diarization/test_data/25-153-2_25-153-2 2025-02-12 5PM Minutes.pdf‎
182 KB
diff --git a/‎notebooks/experiments/minutes_diarization/test_data/25-173-1_25-173-1 2025-02-26 4PM Minutes.pdf‎
87.8 KB b/‎notebooks/experiments/minutes_diarization/test_data/25-173-1_25-173-1 2025-02-26 4PM Minutes.pdf‎
87.8 KB
diff --git a/‎notebooks/experiments/minutes_diarization/test_data/25-173-2_25-173-2 2025-02-26 5PM Minutes.pdf‎
209 KB b/‎notebooks/experiments/minutes_diarization/test_data/25-173-2_25-173-2 2025-02-26 5PM Minutes.pdf‎
209 KB
diff --git a/‎notebooks/videos.ipynb‎
Lines changed: 14 additions & 16 deletions b/‎notebooks/videos.ipynb‎
Lines changed: 14 additions & 16 deletions
diff --git a/‎push_image.sh‎
Lines changed: 11 additions & 0 deletions b/‎push_image.sh‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,43 @@
+name: Build/run tgov
+
+on:
+  push:
+    branches: [ "main", "deploy-lambda" ]
+  pull_request:
+    branches: [ "main" ]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.11"
+    - name: Install dependencies
+      run: |
+        apt-get update
+        apt-get install --no-install-suggests --no-install-recommends --yes python3-venv gcc libpython3-dev
+        apt-get clean
+        rm -rf /var/lib/apt/lists/*
+        python3 -m venv "${POETRY_VENV}"
+        $POETRY_VENV/bin/pip install -U pip setuptools
+        $POETRY_VENV/bin/pip install "poetry==${POETRY_VERSION}"
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        export PATH="${PATH}:${POETRY_VENV}/bin"
+        poetry config virtualenvs.create false
+        poetry install
+    - name: Run Diarization
+      run: |
+        python src/run_diarization.py
@@ -0,0 +1,26 @@
+FROM python:3.12-slim AS build
+ARG POETRY_VERSION=1.3.2
+ENV POETRY_VENV=/opt/poetry-venv
+
+RUN apt-get update && \
+    apt-get install --no-install-suggests --no-install-recommends --yes python3-venv gcc libpython3-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    python3 -m venv "${POETRY_VENV}" \
+    && $POETRY_VENV/bin/pip install -U pip setuptools \
+    && $POETRY_VENV/bin/pip install "poetry==${POETRY_VERSION}"
+
+ENV PATH="${PATH}:${POETRY_VENV}/bin"
+WORKDIR /app
+COPY poetry.lock pyproject.toml ./
+COPY src /app/src
+COPY db /app/db
+COPY README.md /app/README.md
+
+RUN poetry config virtualenvs.create false
+RUN poetry install
+
+ENV PYTHONPATH=/app
+ENV PATH="${POETRY_VENV}/bin:${PATH}"
+
+CMD ["python", "src/run_diarization.py"]
@@ -115,6 +115,8 @@
     "# Create output directory if it doesn't exist\n",
     "VIDEO_DIRECTORY = Path(\"../data/video\")\n",
     "VIDEO_DIRECTORY.mkdir(parents=True, exist_ok=True)\n",
+    "print(file_name)\n",
+    "print(video_url)\n",
     "\n",
     "# Define output path for the video\n",
     "output_path = VIDEO_DIRECTORY / f\"{file_name}.mp4\"\n",
@@ -181,26 +183,15 @@
    "metadata": {},
    "source": [
     "### Convert the video file into a transcipt\n",
-    "This step requires a huggingface login and api_token.\n",
-    "You will also need to agree to terms on each of the following models:\n",
-    "- guillaumekln/faster-whisper\n",
-    "- \n"
+    "This step requires a huggingface login and api_token (?)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from src.videos import transcribe_video\n",
-    "\n",
-    "video_file = \"../data/video/regular_council_meeting___2025_02_26.mp4\"\n",
-    "\n",
-    "transcription_dir = Path(\"../data/transcripts\")\n",
-    "\n",
-    "transcription = await transcribe_video(video_file, transcription_dir)"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
@@ -216,13 +207,20 @@
     "\n",
     "transcription = await transcribe_video_with_diarization(video_file, transcription_dir)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "TGOV Scraper",
+   "display_name": ".venv",
    "language": "python",
-   "name": "tgov-scraper"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -234,7 +232,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
 
@@ -0,0 +1,11 @@
+# !/bin/bash
+
+set -ex
+
+aws ecr get-login-password --region us-east-2 --profile kendall | docker login --username AWS --password-stdin 340531845404.dkr.ecr.us-east-2.amazonaws.com
+docker buildx build --platform linux/amd64 -t tgov_linux . --provenance=false
+export tag=$(date +%s)
+docker tag tgov_linux 340531845404.dkr.ecr.us-east-2.amazonaws.com/tgov:$tag
+docker tag tgov_linux 340531845404.dkr.ecr.us-east-2.amazonaws.com/tgov:latest
+docker push 340531845404.dkr.ecr.us-east-2.amazonaws.com/tgov:$tag
+docker push 340531845404.dkr.ecr.us-east-2.amazonaws.com/tgov:latest
@@ -25,11 +25,11 @@ jupyter = "^1.1.1"
 jupyter-nbextensions-configurator = "^0.6.4"
 python-dotenv = "^1.0.1"
 aiofiles = "^24.1.0"
-faster-whisper = "^1.1.1"
 prefect = "^3.3.0"
 boto3 = "^1.37.24"
 dyntastic = "^0.18.0"
 dateparser = "^1.2.1"
+whisperx = "^3.3.4"
 
 
 [tool.poetry.group.dev.dependencies]