import whisper import os import sys import logging import datetime as dt from datetime import datetime import traceback from pydub import AudioSegment env_var_language_code = os.environ['LANGUAGE_CODE'] env_var_whisper_model = os.environ['WHISPER_MODEL'] # Setup Logging logging.basicConfig( level=logging.DEBUG, # level=logging.INFO, format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"), logging.StreamHandler(sys.stdout) ] ) def get_audio_duration(file_path): audio = AudioSegment.from_file(file_path) duration_seconds = len(audio) / 1000 return duration_seconds try: for root, dirs, files in os.walk('/app/input_files'): for file in files: try: file_path = os.path.join(root, file) logging.debug("#" * 32) logging.debug(file_path) duration = get_audio_duration(file_path) logging.debug("Duration: " + str(duration) + " Seconds") model = whisper.load_model(env_var_whisper_model) if env_var_language_code == "multi": result = model.transcribe(file_path) else: result = model.transcribe(file_path, language=env_var_language_code, initial_prompt="") logging.debug("result: " + str(result)) result_text = result["text"] logging.debug("result: " + result_text) transcript_file = '/app/transcripts/' + file.split(".")[0] + '_transcript_' + env_var_language_code + '_.txt' logging.debug("result: " + str(transcript_file)) with open(transcript_file, 'w') as f: f.write(result_text) except Exception as e: logging.debug("There was an error: " + str(e)) logging.debug("Stacktrace: " + str(traceback.format_exc())) except Exception as e: logging.debug("There was an error: " + str(e)) logging.debug("Stacktrace: " + str(traceback.format_exc()))