INIT
This commit is contained in:
commit
7325f650b6
6 changed files with 109 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
.vscode
|
||||||
|
.DS_Store
|
||||||
22
Dockerfile
Normal file
22
Dockerfile
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# set the working directory
|
||||||
|
WORKDIR /app
|
||||||
|
RUN mkdir /app/input_files
|
||||||
|
RUN mkdir /app/transcripts
|
||||||
|
RUN apt-get update
|
||||||
|
|
||||||
|
RUN apt-get install -y ffmpeg
|
||||||
|
|
||||||
|
# install dependencies
|
||||||
|
COPY ./requirements.txt /app
|
||||||
|
RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt
|
||||||
|
|
||||||
|
# copy model to container
|
||||||
|
COPY ./large-v3.pt /root/.cache/whisper/large-v3.pt
|
||||||
|
|
||||||
|
# copy the scripts to the /app folder
|
||||||
|
COPY ./init.sh /app
|
||||||
|
COPY ./runner.py /app
|
||||||
|
|
||||||
|
CMD ["bash", "init.sh"]
|
||||||
18
README.md
Normal file
18
README.md
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
# STT-Function
|
||||||
|
With the Speech-to-Text (STT) Function you can transcribe a file ("convert" an audio/video file into text).
|
||||||
|
Internally Whisper from OpenAI (https://github.com/openai/whisper) is used to transcribe the audiofile.
|
||||||
|
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
* The container has two folders attached, the input folder with files that should be transcribed and the output path, where the transcript should be saved to.
|
||||||
|
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
Make sure [Podman](https://podman.io/docs/installation) or [Docker](https://docs.docker.com/get-docker/) is installed.
|
||||||
|
|
||||||
|
Download the Model into the Folder where you will build the container image. [Download Link](https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt) or run `wget https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt`
|
||||||
|
|
||||||
|
```
|
||||||
|
podman build -t stt-function .
|
||||||
|
podman run -e LANGUAGE_CODE='de' -e WHISPER_MODEL='tiny' -v '/path/to/audio_video/file/':/app/input_files/ -v /output_path/of/transcript/:/app/transcripts/ --name stt-function_container --rm -t stt-function
|
||||||
|
```
|
||||||
4
init.sh
Executable file
4
init.sh
Executable file
|
|
@ -0,0 +1,4 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
env >> /etc/environment
|
||||||
|
/usr/local/bin/python /app/runner.py
|
||||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
openai-whisper
|
||||||
|
pydub
|
||||||
|
ffmpeg
|
||||||
60
runner.py
Normal file
60
runner.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
import whisper
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import datetime as dt
|
||||||
|
from datetime import datetime
|
||||||
|
import traceback
|
||||||
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
env_var_language_code = os.environ['LANGUAGE_CODE']
|
||||||
|
env_var_whisper_model = os.environ['WHISPER_MODEL']
|
||||||
|
|
||||||
|
# Setup Logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG,
|
||||||
|
# level=logging.INFO,
|
||||||
|
format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s",
|
||||||
|
handlers=[
|
||||||
|
logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
|
||||||
|
logging.StreamHandler(sys.stdout)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_audio_duration(file_path):
|
||||||
|
audio = AudioSegment.from_file(file_path)
|
||||||
|
duration_seconds = len(audio) / 1000
|
||||||
|
return duration_seconds
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
for root, dirs, files in os.walk('/app/input_files'):
|
||||||
|
for file in files:
|
||||||
|
try:
|
||||||
|
file_path = os.path.join(root, file)
|
||||||
|
logging.debug("#" * 32)
|
||||||
|
logging.debug(file_path)
|
||||||
|
|
||||||
|
duration = get_audio_duration(file_path)
|
||||||
|
logging.debug("Duration: " + str(duration) + " Seconds")
|
||||||
|
|
||||||
|
model = whisper.load_model(env_var_whisper_model)
|
||||||
|
if env_var_language_code == "multi":
|
||||||
|
result = model.transcribe(file_path)
|
||||||
|
else:
|
||||||
|
result = model.transcribe(file_path, language=env_var_language_code, initial_prompt="")
|
||||||
|
logging.debug("result: " + str(result))
|
||||||
|
result_text = result["text"]
|
||||||
|
logging.debug("result: " + result_text)
|
||||||
|
|
||||||
|
transcript_file = '/app/transcripts/' + file.split(".")[0] + '_transcript_' + env_var_language_code + '_.txt'
|
||||||
|
logging.debug("result: " + str(transcript_file))
|
||||||
|
with open(transcript_file, 'w') as f:
|
||||||
|
f.write(result_text)
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("There was an error: " + str(e))
|
||||||
|
logging.debug("Stacktrace: " + str(traceback.format_exc()))
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("There was an error: " + str(e))
|
||||||
|
logging.debug("Stacktrace: " + str(traceback.format_exc()))
|
||||||
Loading…
Add table
Add a link
Reference in a new issue