INIT

2024-07-23 18:23:54 +02:00 · 2024-07-23 18:23:54 +02:00 · 7325f650b6
commit 7325f650b6
6 changed files with 109 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 .vscode
 .DS_Store
--- a/22
+++ b/22
@ -0,0 +1,22 @@
 FROM python:3.11-slim
 # set the working directory
 WORKDIR /app
 RUN mkdir /app/input_files
 RUN mkdir /app/transcripts
 RUN apt-get update
 RUN apt-get install -y ffmpeg
 # install dependencies
 COPY ./requirements.txt /app
 RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt
 # copy model to container
 COPY ./large-v3.pt /root/.cache/whisper/large-v3.pt
 # copy the scripts to the /app folder
 COPY ./init.sh /app
 COPY ./runner.py /app
 CMD ["bash", "init.sh"]
--- a/README.md
+++ b/README.md
@ -0,0 +1,18 @@
 # STT-Function
 With the Speech-to-Text (STT) Function you can transcribe a file ("convert" an audio/video file into text).
 Internally Whisper from OpenAI (https://github.com/openai/whisper) is used to transcribe the audiofile.
 ## Structure
 * The container has two folders attached, the input folder with files that should be transcribed and the output path, where the transcript should be saved to.
 ## Setup
 Make sure [Podman](https://podman.io/docs/installation) or [Docker](https://docs.docker.com/get-docker/) is installed.
 Download the Model into the Folder where you will build the container image. [Download Link](https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt) or run `wget https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt`
 ```
 podman build -t stt-function .
 podman run -e LANGUAGE_CODE='de' -e WHISPER_MODEL='tiny' -v '/path/to/audio_video/file/':/app/input_files/ -v /output_path/of/transcript/:/app/transcripts/ --name stt-function_container --rm -t stt-function
 ```
--- a/init.sh
+++ b/init.sh
@ -0,0 +1,4 @@
 #!/bin/bash
 env >> /etc/environment
 /usr/local/bin/python /app/runner.py
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
 openai-whisper
 pydub
 ffmpeg
--- a/runner.py
+++ b/runner.py
@ -0,0 +1,60 @@
 import whisper
 import os
 import sys
 import logging
 import datetime as dt
 from datetime import datetime
 import traceback
 from pydub import AudioSegment
 env_var_language_code = os.environ['LANGUAGE_CODE']
 env_var_whisper_model = os.environ['WHISPER_MODEL']
 # Setup Logging
 logging.basicConfig(
    level=logging.DEBUG,
    # level=logging.INFO,
    format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
        logging.StreamHandler(sys.stdout)
    ]
 )
 def get_audio_duration(file_path):
    audio = AudioSegment.from_file(file_path)
    duration_seconds = len(audio) / 1000
    return duration_seconds
 try:
    for root, dirs, files in os.walk('/app/input_files'):
        for file in files:
            try:
                file_path = os.path.join(root, file)
                logging.debug("#" * 32)
                logging.debug(file_path)
                duration = get_audio_duration(file_path)
                logging.debug("Duration: " + str(duration) + " Seconds")
                model = whisper.load_model(env_var_whisper_model)
                if env_var_language_code == "multi":
                    result = model.transcribe(file_path)
                else:
                    result = model.transcribe(file_path, language=env_var_language_code, initial_prompt="")
                logging.debug("result: " + str(result))
                result_text = result["text"]
                logging.debug("result: " + result_text)
                transcript_file = '/app/transcripts/' + file.split(".")[0] + '_transcript_' + env_var_language_code + '_.txt'
                logging.debug("result: " + str(transcript_file))
                with open(transcript_file, 'w') as f:
                    f.write(result_text)
            except Exception as e:
                logging.debug("There was an error: " + str(e))
                logging.debug("Stacktrace: " + str(traceback.format_exc()))
 except Exception as e:
    logging.debug("There was an error: " + str(e))
    logging.debug("Stacktrace: " + str(traceback.format_exc()))