This commit is contained in:
Niklas Müller 2024-12-01 18:12:39 +01:00
commit bf12747492
7 changed files with 290 additions and 0 deletions

200
runner.py Normal file
View file

@ -0,0 +1,200 @@
import smbclient
import logging
import traceback
import json
import os
import sys
from pathlib import Path
from datetime import datetime
from lxml import etree
import subprocess
# DeepFilterNet (DFN3)
from df.enhance import enhance, init_df, load_audio, save_audio
"""
# ESPNet
import soundfile
from espnet_model_zoo.downloader import ModelDownloader
from espnet2.bin.enh_inference import SeparateSpeech
"""
"""
# PyTorch Audio (PTA)
import torch
import torchaudio
import torchaudio.functional as F
from pesq import pesq
from pystoi import stoi
from torchaudio.utils import download_asset
"""
# Setup Logging
logging.basicConfig(
# level=logging.ERROR,
# level=logging.INFO,
level=logging.DEBUG,
format="%(asctime)s [%(name)s] | %(levelname)s\t| %(message)s",
handlers=[
logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
logging.StreamHandler(sys.stdout)
]
)
# Do not show logging for those logging contexts
# print([k for k in logging.Logger.manager.loggerDict])
for v in [k for k in logging.Logger.manager.loggerDict]:
if v.startswith('smbprotocol') or v.startswith('spnego') or v.startswith('smbclient._io') or v.startswith('urllib3.connectionpool'):
logging.getLogger(v).disabled = True
if "PROCESSING_TOOL" in os.environ:
env_var_processing_tools = r'{}'.format(os.environ['PROCESSING_TOOL'])
else:
env_var_processing_tools = "ALL"
def listdir_nohidden(path):
for f in os.listdir(path):
if not f.startswith('.'):
yield f
# DFN3 (https://github.com/Rikorose/DeepFilterNet)
def run_dfn3(input_file, output_file):
logging.info("DFN3: processing started")
input_file = "/app/input_files/" + file_name
tmp_file = "/tmp/" + Path(input_file).stem + ".wav"
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-acodec', 'pcm_u8', tmp_file]
subprocess.run(ffmpeg_command)
model, df_state, _ = init_df() # Load default model
audio, _ = load_audio(tmp_file, sr=df_state.sr())
enhanced_audio = enhance(model, df_state, audio)
save_audio(output_file, enhanced_audio, df_state.sr())
logging.info("DFN3: processing finished")
# RNNOISE (https://jmvalin.ca/demo/rnnoise/)
def run_rrnoise(input_file, output_file):
logging.info("RNNOISE: processing started")
output_raw_file = "/tmp/modded_file.raw"
output_wav_file = "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav"
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
subprocess.run(ffmpeg_command)
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/tmp/rnnoise_enhanced.raw"]
subprocess.run(rnnoise_command)
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/tmp/rnnoise_enhanced.raw', output_file]
subprocess.run(ffmpeg_command)
# ESPNET (https://github.com/espnet/espnet)
"""
logging.info("ESPNET: processing started")
input_file = "/app/audio/" + file_name
output_file = "/app/audio/modded_" + file_name
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-ar', '16k', '-ac', '2', '-acodec', 'pcm_s16le', output_file]
subprocess.run(ffmpeg_command)
def enhance_speech(input_file, output_file):
# Load the input audio file
mixwav_mc, sr = soundfile.read(input_file)
np.set_printoptions(threshold=sys.maxsize)
# logging.debug(arr)
logging.debug("mixwav_mc")
logging.debug(mixwav_mc)
logging.debug("mixwav_mc[1]")
logging.debug(mixwav_mc[1])
# mixwav_mc = mixwav_mc[None, :]
# Download the ESPnet model for speech enhancement
model_tag = "espnet/Wangyou_Zhang_chime4_enh_train_enh_beamformer_mvdr_raw"
model_downloader = ModelDownloader()
model_config = model_downloader.download_and_unpack(model_tag)
# Initialize the speech enhancement model
enh_model_mc = SeparateSpeech(
train_config=model_config["train_config"],
model_file=model_config["model_file"],
normalize_segment_scale=False,
show_progressbar=True,
ref_channel=4,
normalize_output_wav=True,
device="cpu",
)
# Perform speech enhancement
enhanced_waveform = enh_model_mc(mixwav_mc, sr)
# Save the enhanced waveform to a new audio file
soundfile.write(output_file, enhanced_waveform, sr)
"""
# PTA (https://pytorch.org/audio/stable/index.html)
"""
logging.info("PTA: processing started")
rnnoise_out_filename = "[PTA]" + get_title_from_dbx_path("/app/audio/" + file_name.lower().replace(".wav", "").upper() + ".DBX") + "_-_" + file_name
logging.debug("Torch Version:" + torch.__version__)
logging.info("Torch (Audio) Version: " + torchaudio.__version__)
if rnnoise_out_filename not in output_audio_files:
input_file = "/app/audio/" + file_name
output_raw_file = "/app/audio/modded_file.raw"
output_wav_file = "/app/audio/modded_file.wav"
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
subprocess.run(ffmpeg_command)
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/app/audio/rnnoise_enhanced.raw"]
subprocess.run(rnnoise_command)
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/app/audio/rnnoise_enhanced.raw', output_wav_file]
subprocess.run(ffmpeg_command)
with open(output_wav_file, 'rb') as f:
tmpFile = f.read()
with smbclient.open_file("\\transfer-srv.office.radioteleffh.de\\transfer\\Technik\\sound_processing\\" + rnnoise_out_filename, mode="wb") as fd:
fd.write(tmpFile)
logging.info("PTA: processing finished")
else:
logging.info("PTA: File was already processed in an earlier iteration.")
"""
logging.debug("Files in /app/input_files/:")
logging.debug(os.listdir("/app/input_files/"))
for file_name in listdir_nohidden("/app/input_files/"):
try:
logging.info("########################\t NEW FILE\t########################")
logging.info("Filename: " + file_name)
input_file = "/app/input_files/" + file_name
logging.debug("env_var_processing_tools: " + env_var_processing_tools)
if env_var_processing_tools == "DNF3":
run_dfn3(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
elif env_var_processing_tools == "RNNOISE":
run_rrnoise(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
elif env_var_processing_tools == "ALL":
run_dfn3(input_file, "/app/output_files/[DFN3]" + Path(input_file).stem + ".wav")
run_rrnoise(input_file, "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav")
else:
logging.warning("The given processing tool -->" + env_var_processing_tools + "<-- was not found, please try again with a valid tool.")
except Exception as e:
logging.error("There was an error: " + str(e))
logging.error("Stacktrace: " + str(traceback.format_exc()))