import logging import traceback import json import os import sys from pathlib import Path from datetime import datetime import subprocess # DeepFilterNet (DFN3) from df.enhance import enhance, init_df, load_audio, save_audio """ # ESPNet import soundfile from espnet_model_zoo.downloader import ModelDownloader from espnet2.bin.enh_inference import SeparateSpeech """ """ # PyTorch Audio (PTA) import torch import torchaudio import torchaudio.functional as F from pesq import pesq from pystoi import stoi from torchaudio.utils import download_asset """ # Setup Logging logging.basicConfig( # level=logging.ERROR, # level=logging.INFO, level=logging.DEBUG, format="%(asctime)s [%(name)s] | %(levelname)s\t| %(message)s", handlers=[ logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"), logging.StreamHandler(sys.stdout) ] ) # Do not show logging for those logging contexts # print([k for k in logging.Logger.manager.loggerDict]) for v in [k for k in logging.Logger.manager.loggerDict]: if v.startswith('smbprotocol') or v.startswith('spnego') or v.startswith('smbclient._io') or v.startswith('urllib3.connectionpool'): logging.getLogger(v).disabled = True if "PROCESSING_TOOL" in os.environ: env_var_processing_tools = r'{}'.format(os.environ['PROCESSING_TOOL']) else: env_var_processing_tools = "ALL" def listdir_nohidden(path): for f in os.listdir(path): if not f.startswith('.'): yield f # DFN3 (https://github.com/Rikorose/DeepFilterNet) def run_dfn3(input_file, output_file): logging.info("DFN3: processing started") input_file = "/app/input_files/" + file_name tmp_file = "/tmp/" + Path(input_file).stem + ".wav" ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-acodec', 'pcm_u8', tmp_file] subprocess.run(ffmpeg_command) model, df_state, _ = init_df() # Load default model audio, _ = load_audio(tmp_file, sr=df_state.sr()) enhanced_audio = enhance(model, df_state, audio) save_audio(output_file, enhanced_audio, df_state.sr()) logging.info("DFN3: processing finished") # RNNOISE (https://jmvalin.ca/demo/rnnoise/) def run_rrnoise(input_file, output_file): logging.info("RNNOISE: processing started") output_raw_file = "/tmp/modded_file.raw" output_wav_file = "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav" ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file] subprocess.run(ffmpeg_command) rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/tmp/rnnoise_enhanced.raw"] subprocess.run(rnnoise_command) ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/tmp/rnnoise_enhanced.raw', output_file] subprocess.run(ffmpeg_command) # ESPNET (https://github.com/espnet/espnet) """ logging.info("ESPNET: processing started") input_file = "/app/audio/" + file_name output_file = "/app/audio/modded_" + file_name ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-ar', '16k', '-ac', '2', '-acodec', 'pcm_s16le', output_file] subprocess.run(ffmpeg_command) def enhance_speech(input_file, output_file): # Load the input audio file mixwav_mc, sr = soundfile.read(input_file) np.set_printoptions(threshold=sys.maxsize) # logging.debug(arr) logging.debug("mixwav_mc") logging.debug(mixwav_mc) logging.debug("mixwav_mc[1]") logging.debug(mixwav_mc[1]) # mixwav_mc = mixwav_mc[None, :] # Download the ESPnet model for speech enhancement model_tag = "espnet/Wangyou_Zhang_chime4_enh_train_enh_beamformer_mvdr_raw" model_downloader = ModelDownloader() model_config = model_downloader.download_and_unpack(model_tag) # Initialize the speech enhancement model enh_model_mc = SeparateSpeech( train_config=model_config["train_config"], model_file=model_config["model_file"], normalize_segment_scale=False, show_progressbar=True, ref_channel=4, normalize_output_wav=True, device="cpu", ) # Perform speech enhancement enhanced_waveform = enh_model_mc(mixwav_mc, sr) # Save the enhanced waveform to a new audio file soundfile.write(output_file, enhanced_waveform, sr) """ # PTA (https://pytorch.org/audio/stable/index.html) """ logging.info("PTA: processing started") rnnoise_out_filename = "[PTA]" + get_title_from_dbx_path("/app/audio/" + file_name.lower().replace(".wav", "").upper() + ".DBX") + "_-_" + file_name logging.debug("Torch Version:" + torch.__version__) logging.info("Torch (Audio) Version: " + torchaudio.__version__) if rnnoise_out_filename not in output_audio_files: input_file = "/app/audio/" + file_name output_raw_file = "/app/audio/modded_file.raw" output_wav_file = "/app/audio/modded_file.wav" ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file] subprocess.run(ffmpeg_command) rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/app/audio/rnnoise_enhanced.raw"] subprocess.run(rnnoise_command) ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/app/audio/rnnoise_enhanced.raw', output_wav_file] subprocess.run(ffmpeg_command) with open(output_wav_file, 'rb') as f: tmpFile = f.read() with smbclient.open_file("\\transfer-srv.office.radioteleffh.de\\transfer\\Technik\\sound_processing\\" + rnnoise_out_filename, mode="wb") as fd: fd.write(tmpFile) logging.info("PTA: processing finished") else: logging.info("PTA: File was already processed in an earlier iteration.") """ logging.debug("Files in /app/input_files/:") logging.debug(os.listdir("/app/input_files/")) for file_name in listdir_nohidden("/app/input_files/"): try: logging.info("########################\t NEW FILE\t########################") logging.info("Filename: " + file_name) input_file = "/app/input_files/" + file_name logging.debug("env_var_processing_tools: " + env_var_processing_tools) if env_var_processing_tools == "DNF3": run_dfn3(input_file, "/app/output_files/" + Path(input_file).stem + ".wav") elif env_var_processing_tools == "RNNOISE": run_rrnoise(input_file, "/app/output_files/" + Path(input_file).stem + ".wav") elif env_var_processing_tools == "ALL": run_dfn3(input_file, "/app/output_files/[DFN3]" + Path(input_file).stem + ".wav") run_rrnoise(input_file, "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav") else: logging.warning("The given processing tool -->" + env_var_processing_tools + "<-- was not found, please try again with a valid tool.") except Exception as e: logging.error("There was an error: " + str(e)) logging.error("Stacktrace: " + str(traceback.format_exc()))