60 lines
2.2 KiB
Python
60 lines
2.2 KiB
Python
import whisper
|
|
import os
|
|
import sys
|
|
import logging
|
|
import datetime as dt
|
|
from datetime import datetime
|
|
import traceback
|
|
from pydub import AudioSegment
|
|
|
|
env_var_language_code = os.environ['LANGUAGE_CODE']
|
|
env_var_whisper_model = os.environ['WHISPER_MODEL']
|
|
|
|
# Setup Logging
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
# level=logging.INFO,
|
|
format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s",
|
|
handlers=[
|
|
logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
|
|
logging.StreamHandler(sys.stdout)
|
|
]
|
|
)
|
|
|
|
|
|
def get_audio_duration(file_path):
|
|
audio = AudioSegment.from_file(file_path)
|
|
duration_seconds = len(audio) / 1000
|
|
return duration_seconds
|
|
|
|
|
|
try:
|
|
for root, dirs, files in os.walk('/app/input_files'):
|
|
for file in files:
|
|
try:
|
|
file_path = os.path.join(root, file)
|
|
logging.debug("#" * 32)
|
|
logging.debug(file_path)
|
|
|
|
duration = get_audio_duration(file_path)
|
|
logging.debug("Duration: " + str(duration) + " Seconds")
|
|
|
|
model = whisper.load_model(env_var_whisper_model)
|
|
if env_var_language_code == "multi":
|
|
result = model.transcribe(file_path)
|
|
else:
|
|
result = model.transcribe(file_path, language=env_var_language_code, initial_prompt="")
|
|
logging.debug("result: " + str(result))
|
|
result_text = result["text"]
|
|
logging.debug("result: " + result_text)
|
|
|
|
transcript_file = '/app/transcripts/' + file.split(".")[0] + '_transcript_' + env_var_language_code + '_.txt'
|
|
logging.debug("result: " + str(transcript_file))
|
|
with open(transcript_file, 'w') as f:
|
|
f.write(result_text)
|
|
except Exception as e:
|
|
logging.debug("There was an error: " + str(e))
|
|
logging.debug("Stacktrace: " + str(traceback.format_exc()))
|
|
except Exception as e:
|
|
logging.debug("There was an error: " + str(e))
|
|
logging.debug("Stacktrace: " + str(traceback.format_exc()))
|