Nirnay/run.py at master · Ni8crawler18/Nirnay · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import time
import queue
from fastapi import FastAPI, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from model import transcriber, claims as claim_module

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

text_queue = queue.Queue()

class TranscribeRequest(BaseModel):
    url: str
    chunk_duration: int = 30
    model_size: str = "base"
    duration: int = 120

@app.post("/transcribe")
async def transcribe_audio(request: TranscribeRequest, background_tasks: BackgroundTasks):
    output_file = "transcription_results.txt"
    background_tasks.add_task(
        transcribe_background,
        request.url,
        request.chunk_duration,
        request.duration,
        request.model_size,
        output_file
    )
    return {"message": "Transcription started", "file": output_file}

def transcribe_background(url, chunk_duration, duration, model_size, output_file):
    transcriber_instance = transcriber.M3U8StreamTranscriber(
        stream_url=url,
        chunk_duration=chunk_duration,
        total_duration=duration,
        model_size=model_size,
        output_path=output_file
    )
    path = transcriber_instance.transcribe()
    text_queue.put(path)

import time

@app.post("/extract_claims")
async def extract_claims():
    transcription_file = "transcription_results.txt"
    claims_file = "claims_results.txt"

    # Wait 60 seconds to allow transcription to complete
    time.sleep(60)

    extractor = claim_module.ClaimExtractor()
    success = extractor.process_transcription(transcription_file, claims_file)

    if success:
        with open(transcription_file, "r", encoding="utf-8") as tf:
            transcription_text = tf.read()

        with open(claims_file, "r", encoding="utf-8") as cf:
            claims_text = cf.read()

        return {
            "message": "Claims extracted",
            "transcription": transcription_text,
            "claims": claims_text.splitlines()  # split into list of claims
        }

    return {"error": "Extraction failed"}

@app.get("/get_transcription")
async def get_transcription():
    transcription_file = "transcription_results.txt"

    if not os.path.exists(transcription_file) or os.path.getsize(transcription_file) == 0:
        return {"error": "Transcription not available yet"}

    with open(transcription_file, "r", encoding="utf-8") as tf:
        transcription_text = tf.read()

    return {
        "message": "Transcription fetched successfully",
        "transcription": transcription_text
    }


@app.get("/")
def root():
    return {"message": "API is running"}

@app.get("/ping")
def ping():
    return {"message": "pong"}