198 lines
7.2 KiB
Python
198 lines
7.2 KiB
Python
import logging
|
|
import traceback
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import subprocess
|
|
|
|
# DeepFilterNet (DFN3)
|
|
from df.enhance import enhance, init_df, load_audio, save_audio
|
|
|
|
"""
|
|
# ESPNet
|
|
import soundfile
|
|
from espnet_model_zoo.downloader import ModelDownloader
|
|
from espnet2.bin.enh_inference import SeparateSpeech
|
|
"""
|
|
|
|
"""
|
|
# PyTorch Audio (PTA)
|
|
import torch
|
|
import torchaudio
|
|
import torchaudio.functional as F
|
|
from pesq import pesq
|
|
from pystoi import stoi
|
|
from torchaudio.utils import download_asset
|
|
"""
|
|
|
|
# Setup Logging
|
|
logging.basicConfig(
|
|
# level=logging.ERROR,
|
|
# level=logging.INFO,
|
|
level=logging.DEBUG,
|
|
format="%(asctime)s [%(name)s] | %(levelname)s\t| %(message)s",
|
|
handlers=[
|
|
logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
|
|
logging.StreamHandler(sys.stdout)
|
|
]
|
|
)
|
|
|
|
# Do not show logging for those logging contexts
|
|
# print([k for k in logging.Logger.manager.loggerDict])
|
|
for v in [k for k in logging.Logger.manager.loggerDict]:
|
|
if v.startswith('smbprotocol') or v.startswith('spnego') or v.startswith('smbclient._io') or v.startswith('urllib3.connectionpool'):
|
|
logging.getLogger(v).disabled = True
|
|
|
|
if "PROCESSING_TOOL" in os.environ:
|
|
env_var_processing_tools = r'{}'.format(os.environ['PROCESSING_TOOL'])
|
|
else:
|
|
env_var_processing_tools = "ALL"
|
|
|
|
|
|
def listdir_nohidden(path):
|
|
for f in os.listdir(path):
|
|
if not f.startswith('.'):
|
|
yield f
|
|
|
|
|
|
# DFN3 (https://github.com/Rikorose/DeepFilterNet)
|
|
def run_dfn3(input_file, output_file):
|
|
logging.info("DFN3: processing started")
|
|
|
|
input_file = "/app/input_files/" + file_name
|
|
tmp_file = "/tmp/" + Path(input_file).stem + ".wav"
|
|
|
|
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-acodec', 'pcm_u8', tmp_file]
|
|
subprocess.run(ffmpeg_command)
|
|
|
|
model, df_state, _ = init_df() # Load default model
|
|
audio, _ = load_audio(tmp_file, sr=df_state.sr())
|
|
enhanced_audio = enhance(model, df_state, audio)
|
|
|
|
save_audio(output_file, enhanced_audio, df_state.sr())
|
|
|
|
logging.info("DFN3: processing finished")
|
|
|
|
|
|
# RNNOISE (https://jmvalin.ca/demo/rnnoise/)
|
|
def run_rrnoise(input_file, output_file):
|
|
logging.info("RNNOISE: processing started")
|
|
|
|
output_raw_file = "/tmp/modded_file.raw"
|
|
output_wav_file = "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav"
|
|
|
|
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
|
|
subprocess.run(ffmpeg_command)
|
|
|
|
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/tmp/rnnoise_enhanced.raw"]
|
|
subprocess.run(rnnoise_command)
|
|
|
|
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/tmp/rnnoise_enhanced.raw', output_file]
|
|
subprocess.run(ffmpeg_command)
|
|
|
|
|
|
# ESPNET (https://github.com/espnet/espnet)
|
|
"""
|
|
logging.info("ESPNET: processing started")
|
|
input_file = "/app/audio/" + file_name
|
|
output_file = "/app/audio/modded_" + file_name
|
|
|
|
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-ar', '16k', '-ac', '2', '-acodec', 'pcm_s16le', output_file]
|
|
subprocess.run(ffmpeg_command)
|
|
|
|
def enhance_speech(input_file, output_file):
|
|
# Load the input audio file
|
|
mixwav_mc, sr = soundfile.read(input_file)
|
|
|
|
np.set_printoptions(threshold=sys.maxsize)
|
|
# logging.debug(arr)
|
|
logging.debug("mixwav_mc")
|
|
logging.debug(mixwav_mc)
|
|
logging.debug("mixwav_mc[1]")
|
|
logging.debug(mixwav_mc[1])
|
|
|
|
# mixwav_mc = mixwav_mc[None, :]
|
|
|
|
# Download the ESPnet model for speech enhancement
|
|
model_tag = "espnet/Wangyou_Zhang_chime4_enh_train_enh_beamformer_mvdr_raw"
|
|
model_downloader = ModelDownloader()
|
|
model_config = model_downloader.download_and_unpack(model_tag)
|
|
|
|
# Initialize the speech enhancement model
|
|
enh_model_mc = SeparateSpeech(
|
|
train_config=model_config["train_config"],
|
|
model_file=model_config["model_file"],
|
|
normalize_segment_scale=False,
|
|
show_progressbar=True,
|
|
ref_channel=4,
|
|
normalize_output_wav=True,
|
|
device="cpu",
|
|
)
|
|
|
|
# Perform speech enhancement
|
|
enhanced_waveform = enh_model_mc(mixwav_mc, sr)
|
|
|
|
# Save the enhanced waveform to a new audio file
|
|
soundfile.write(output_file, enhanced_waveform, sr)
|
|
"""
|
|
|
|
|
|
# PTA (https://pytorch.org/audio/stable/index.html)
|
|
"""
|
|
logging.info("PTA: processing started")
|
|
rnnoise_out_filename = "[PTA]" + get_title_from_dbx_path("/app/audio/" + file_name.lower().replace(".wav", "").upper() + ".DBX") + "_-_" + file_name
|
|
|
|
logging.debug("Torch Version:" + torch.__version__)
|
|
logging.info("Torch (Audio) Version: " + torchaudio.__version__)
|
|
|
|
if rnnoise_out_filename not in output_audio_files:
|
|
input_file = "/app/audio/" + file_name
|
|
output_raw_file = "/app/audio/modded_file.raw"
|
|
output_wav_file = "/app/audio/modded_file.wav"
|
|
|
|
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
|
|
subprocess.run(ffmpeg_command)
|
|
|
|
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/app/audio/rnnoise_enhanced.raw"]
|
|
subprocess.run(rnnoise_command)
|
|
|
|
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/app/audio/rnnoise_enhanced.raw', output_wav_file]
|
|
subprocess.run(ffmpeg_command)
|
|
|
|
with open(output_wav_file, 'rb') as f:
|
|
tmpFile = f.read()
|
|
with smbclient.open_file("\\transfer-srv.office.radioteleffh.de\\transfer\\Technik\\sound_processing\\" + rnnoise_out_filename, mode="wb") as fd:
|
|
fd.write(tmpFile)
|
|
logging.info("PTA: processing finished")
|
|
else:
|
|
logging.info("PTA: File was already processed in an earlier iteration.")
|
|
"""
|
|
|
|
|
|
logging.debug("Files in /app/input_files/:")
|
|
logging.debug(os.listdir("/app/input_files/"))
|
|
|
|
for file_name in listdir_nohidden("/app/input_files/"):
|
|
try:
|
|
logging.info("########################\t NEW FILE\t########################")
|
|
logging.info("Filename: " + file_name)
|
|
|
|
input_file = "/app/input_files/" + file_name
|
|
|
|
logging.debug("env_var_processing_tools: " + env_var_processing_tools)
|
|
|
|
if env_var_processing_tools == "DNF3":
|
|
run_dfn3(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
|
|
elif env_var_processing_tools == "RNNOISE":
|
|
run_rrnoise(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
|
|
elif env_var_processing_tools == "ALL":
|
|
run_dfn3(input_file, "/app/output_files/[DFN3]" + Path(input_file).stem + ".wav")
|
|
run_rrnoise(input_file, "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav")
|
|
else:
|
|
logging.warning("The given processing tool -->" + env_var_processing_tools + "<-- was not found, please try again with a valid tool.")
|
|
|
|
except Exception as e:
|
|
logging.error("There was an error: " + str(e))
|
|
logging.error("Stacktrace: " + str(traceback.format_exc()))
|