INIT
This commit is contained in:
commit
bf12747492
7 changed files with 290 additions and 0 deletions
200
runner.py
Normal file
200
runner.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import smbclient
|
||||
import logging
|
||||
import traceback
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from lxml import etree
|
||||
import subprocess
|
||||
|
||||
# DeepFilterNet (DFN3)
|
||||
from df.enhance import enhance, init_df, load_audio, save_audio
|
||||
|
||||
"""
|
||||
# ESPNet
|
||||
import soundfile
|
||||
from espnet_model_zoo.downloader import ModelDownloader
|
||||
from espnet2.bin.enh_inference import SeparateSpeech
|
||||
"""
|
||||
|
||||
"""
|
||||
# PyTorch Audio (PTA)
|
||||
import torch
|
||||
import torchaudio
|
||||
import torchaudio.functional as F
|
||||
from pesq import pesq
|
||||
from pystoi import stoi
|
||||
from torchaudio.utils import download_asset
|
||||
"""
|
||||
|
||||
# Setup Logging
|
||||
logging.basicConfig(
|
||||
# level=logging.ERROR,
|
||||
# level=logging.INFO,
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s [%(name)s] | %(levelname)s\t| %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
# Do not show logging for those logging contexts
|
||||
# print([k for k in logging.Logger.manager.loggerDict])
|
||||
for v in [k for k in logging.Logger.manager.loggerDict]:
|
||||
if v.startswith('smbprotocol') or v.startswith('spnego') or v.startswith('smbclient._io') or v.startswith('urllib3.connectionpool'):
|
||||
logging.getLogger(v).disabled = True
|
||||
|
||||
if "PROCESSING_TOOL" in os.environ:
|
||||
env_var_processing_tools = r'{}'.format(os.environ['PROCESSING_TOOL'])
|
||||
else:
|
||||
env_var_processing_tools = "ALL"
|
||||
|
||||
|
||||
def listdir_nohidden(path):
|
||||
for f in os.listdir(path):
|
||||
if not f.startswith('.'):
|
||||
yield f
|
||||
|
||||
|
||||
# DFN3 (https://github.com/Rikorose/DeepFilterNet)
|
||||
def run_dfn3(input_file, output_file):
|
||||
logging.info("DFN3: processing started")
|
||||
|
||||
input_file = "/app/input_files/" + file_name
|
||||
tmp_file = "/tmp/" + Path(input_file).stem + ".wav"
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-acodec', 'pcm_u8', tmp_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
model, df_state, _ = init_df() # Load default model
|
||||
audio, _ = load_audio(tmp_file, sr=df_state.sr())
|
||||
enhanced_audio = enhance(model, df_state, audio)
|
||||
|
||||
save_audio(output_file, enhanced_audio, df_state.sr())
|
||||
|
||||
logging.info("DFN3: processing finished")
|
||||
|
||||
|
||||
# RNNOISE (https://jmvalin.ca/demo/rnnoise/)
|
||||
def run_rrnoise(input_file, output_file):
|
||||
logging.info("RNNOISE: processing started")
|
||||
|
||||
output_raw_file = "/tmp/modded_file.raw"
|
||||
output_wav_file = "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav"
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/tmp/rnnoise_enhanced.raw"]
|
||||
subprocess.run(rnnoise_command)
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/tmp/rnnoise_enhanced.raw', output_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
|
||||
# ESPNET (https://github.com/espnet/espnet)
|
||||
"""
|
||||
logging.info("ESPNET: processing started")
|
||||
input_file = "/app/audio/" + file_name
|
||||
output_file = "/app/audio/modded_" + file_name
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-ar', '16k', '-ac', '2', '-acodec', 'pcm_s16le', output_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
def enhance_speech(input_file, output_file):
|
||||
# Load the input audio file
|
||||
mixwav_mc, sr = soundfile.read(input_file)
|
||||
|
||||
np.set_printoptions(threshold=sys.maxsize)
|
||||
# logging.debug(arr)
|
||||
logging.debug("mixwav_mc")
|
||||
logging.debug(mixwav_mc)
|
||||
logging.debug("mixwav_mc[1]")
|
||||
logging.debug(mixwav_mc[1])
|
||||
|
||||
# mixwav_mc = mixwav_mc[None, :]
|
||||
|
||||
# Download the ESPnet model for speech enhancement
|
||||
model_tag = "espnet/Wangyou_Zhang_chime4_enh_train_enh_beamformer_mvdr_raw"
|
||||
model_downloader = ModelDownloader()
|
||||
model_config = model_downloader.download_and_unpack(model_tag)
|
||||
|
||||
# Initialize the speech enhancement model
|
||||
enh_model_mc = SeparateSpeech(
|
||||
train_config=model_config["train_config"],
|
||||
model_file=model_config["model_file"],
|
||||
normalize_segment_scale=False,
|
||||
show_progressbar=True,
|
||||
ref_channel=4,
|
||||
normalize_output_wav=True,
|
||||
device="cpu",
|
||||
)
|
||||
|
||||
# Perform speech enhancement
|
||||
enhanced_waveform = enh_model_mc(mixwav_mc, sr)
|
||||
|
||||
# Save the enhanced waveform to a new audio file
|
||||
soundfile.write(output_file, enhanced_waveform, sr)
|
||||
"""
|
||||
|
||||
|
||||
# PTA (https://pytorch.org/audio/stable/index.html)
|
||||
"""
|
||||
logging.info("PTA: processing started")
|
||||
rnnoise_out_filename = "[PTA]" + get_title_from_dbx_path("/app/audio/" + file_name.lower().replace(".wav", "").upper() + ".DBX") + "_-_" + file_name
|
||||
|
||||
logging.debug("Torch Version:" + torch.__version__)
|
||||
logging.info("Torch (Audio) Version: " + torchaudio.__version__)
|
||||
|
||||
if rnnoise_out_filename not in output_audio_files:
|
||||
input_file = "/app/audio/" + file_name
|
||||
output_raw_file = "/app/audio/modded_file.raw"
|
||||
output_wav_file = "/app/audio/modded_file.wav"
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/app/audio/rnnoise_enhanced.raw"]
|
||||
subprocess.run(rnnoise_command)
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/app/audio/rnnoise_enhanced.raw', output_wav_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
with open(output_wav_file, 'rb') as f:
|
||||
tmpFile = f.read()
|
||||
with smbclient.open_file("\\transfer-srv.office.radioteleffh.de\\transfer\\Technik\\sound_processing\\" + rnnoise_out_filename, mode="wb") as fd:
|
||||
fd.write(tmpFile)
|
||||
logging.info("PTA: processing finished")
|
||||
else:
|
||||
logging.info("PTA: File was already processed in an earlier iteration.")
|
||||
"""
|
||||
|
||||
|
||||
logging.debug("Files in /app/input_files/:")
|
||||
logging.debug(os.listdir("/app/input_files/"))
|
||||
|
||||
for file_name in listdir_nohidden("/app/input_files/"):
|
||||
try:
|
||||
logging.info("########################\t NEW FILE\t########################")
|
||||
logging.info("Filename: " + file_name)
|
||||
|
||||
input_file = "/app/input_files/" + file_name
|
||||
|
||||
logging.debug("env_var_processing_tools: " + env_var_processing_tools)
|
||||
|
||||
if env_var_processing_tools == "DNF3":
|
||||
run_dfn3(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
|
||||
elif env_var_processing_tools == "RNNOISE":
|
||||
run_rrnoise(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
|
||||
elif env_var_processing_tools == "ALL":
|
||||
run_dfn3(input_file, "/app/output_files/[DFN3]" + Path(input_file).stem + ".wav")
|
||||
run_rrnoise(input_file, "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav")
|
||||
else:
|
||||
logging.warning("The given processing tool -->" + env_var_processing_tools + "<-- was not found, please try again with a valid tool.")
|
||||
|
||||
except Exception as e:
|
||||
logging.error("There was an error: " + str(e))
|
||||
logging.error("Stacktrace: " + str(traceback.format_exc()))
|
||||
Loading…
Add table
Add a link
Reference in a new issue