This commit is contained in:
Niklas Müller 2024-12-01 18:12:39 +01:00
commit bf12747492
7 changed files with 290 additions and 0 deletions

40
Dockerfile Normal file
View file

@ -0,0 +1,40 @@
################################################################
#
# Project: Sound Processing Functions
# Created by: Niklas Müller
# Created at: 2023.12.21
#
# podman build -t sound-processing-functions .
# podman run -e PROCESSING_TOOL='DNF3' -v '/path/to/audio_video/file/':/app/input_files/ -v /output_path/:/app/output_files/ -d --name sound-processing-functions -t sound-processing-functions
# podman stop sound-processing-functions; podman rm sound-processing-functions; podman build -t sound-processing-functions .; podman run -d --name sound-processing-functions sound-processing-functions
#
################################################################
FROM python:3.11.3-bullseye
# set the working directory
WORKDIR /app
RUN mkdir /app/audio
RUN mkdir /app/rnnoise
# Install Packages
RUN apt-get update
RUN apt-get -y install cmake build-essential ffmpeg git
# RNNOISE
RUN cd /app/rnnoise
RUN git clone https://gitlab.xiph.org/xiph/rnnoise.git
RUN (cd /app/rnnoise && ./autogen.sh)
RUN (cd /app/rnnoise && ./configure)
RUN (cd /app/rnnoise && make)
RUN (cd /app/rnnoise && make install)
# install dependencies
COPY ./requirements.txt /app
RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt
# copy the scripts to the folder
COPY . /app
CMD ["bash", "init.sh"]

13
README.md Normal file
View file

@ -0,0 +1,13 @@
# Sound-Processing-Function
Run DNF3 and/or RNNOISE on any audio you like.
Just build it:
```podman build -t sound-processing-functions .```
and run it
```podman run -e PROCESSING_TOOL='DNF3' -v '/path/to/audio_video/file/':/app/input_files/ -v /output_path/:/app/output_files/ -d --name sound-processing-functions -t sound-processing-functions```
With the Environment Variable ```PROCESSING_TOOL``` you can select which processing should be run. If you do not supply the variable, bot all will be run and the name of the outputfile will mention the used processing.

0
clean_audio/.gitkeep Normal file
View file

5
init.sh Normal file
View file

@ -0,0 +1,5 @@
#!/bin/bash
env >> /etc/environment
/usr/local/bin/python /app/runner.py

0
noisy_audio/.gitkeep Normal file
View file

32
requirements.txt Normal file
View file

@ -0,0 +1,32 @@
smbprotocol
requests
lxml
ffmpeg-python
# DFN (DeepFilterNet)
deepfilternet
torch
torchaudio -f https://download.pytorch.org/whl/cpu/torch_stable.html
# RNNOISE
# None needed
# PTA
#mir_eval
#pesq
#pystoi
# ESP
#sentencepiece
#git+https://github.com/espnet/espnet
#espnet_model_zoo
# wheel
# https://files.pythonhosted.org/packages/4d/9d/9153942f0e2143a43978bcefba31d79187b7037bed3f85a6668c69493062/sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
# MISC
#torchaudio

200
runner.py Normal file
View file

@ -0,0 +1,200 @@
import smbclient
import logging
import traceback
import json
import os
import sys
from pathlib import Path
from datetime import datetime
from lxml import etree
import subprocess
# DeepFilterNet (DFN3)
from df.enhance import enhance, init_df, load_audio, save_audio
"""
# ESPNet
import soundfile
from espnet_model_zoo.downloader import ModelDownloader
from espnet2.bin.enh_inference import SeparateSpeech
"""
"""
# PyTorch Audio (PTA)
import torch
import torchaudio
import torchaudio.functional as F
from pesq import pesq
from pystoi import stoi
from torchaudio.utils import download_asset
"""
# Setup Logging
logging.basicConfig(
# level=logging.ERROR,
# level=logging.INFO,
level=logging.DEBUG,
format="%(asctime)s [%(name)s] | %(levelname)s\t| %(message)s",
handlers=[
logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
logging.StreamHandler(sys.stdout)
]
)
# Do not show logging for those logging contexts
# print([k for k in logging.Logger.manager.loggerDict])
for v in [k for k in logging.Logger.manager.loggerDict]:
if v.startswith('smbprotocol') or v.startswith('spnego') or v.startswith('smbclient._io') or v.startswith('urllib3.connectionpool'):
logging.getLogger(v).disabled = True
if "PROCESSING_TOOL" in os.environ:
env_var_processing_tools = r'{}'.format(os.environ['PROCESSING_TOOL'])
else:
env_var_processing_tools = "ALL"
def listdir_nohidden(path):
for f in os.listdir(path):
if not f.startswith('.'):
yield f
# DFN3 (https://github.com/Rikorose/DeepFilterNet)
def run_dfn3(input_file, output_file):
logging.info("DFN3: processing started")
input_file = "/app/input_files/" + file_name
tmp_file = "/tmp/" + Path(input_file).stem + ".wav"
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-acodec', 'pcm_u8', tmp_file]
subprocess.run(ffmpeg_command)
model, df_state, _ = init_df() # Load default model
audio, _ = load_audio(tmp_file, sr=df_state.sr())
enhanced_audio = enhance(model, df_state, audio)
save_audio(output_file, enhanced_audio, df_state.sr())
logging.info("DFN3: processing finished")
# RNNOISE (https://jmvalin.ca/demo/rnnoise/)
def run_rrnoise(input_file, output_file):
logging.info("RNNOISE: processing started")
output_raw_file = "/tmp/modded_file.raw"
output_wav_file = "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav"
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
subprocess.run(ffmpeg_command)
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/tmp/rnnoise_enhanced.raw"]
subprocess.run(rnnoise_command)
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/tmp/rnnoise_enhanced.raw', output_file]
subprocess.run(ffmpeg_command)
# ESPNET (https://github.com/espnet/espnet)
"""
logging.info("ESPNET: processing started")
input_file = "/app/audio/" + file_name
output_file = "/app/audio/modded_" + file_name
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-ar', '16k', '-ac', '2', '-acodec', 'pcm_s16le', output_file]
subprocess.run(ffmpeg_command)
def enhance_speech(input_file, output_file):
# Load the input audio file
mixwav_mc, sr = soundfile.read(input_file)
np.set_printoptions(threshold=sys.maxsize)
# logging.debug(arr)
logging.debug("mixwav_mc")
logging.debug(mixwav_mc)
logging.debug("mixwav_mc[1]")
logging.debug(mixwav_mc[1])
# mixwav_mc = mixwav_mc[None, :]
# Download the ESPnet model for speech enhancement
model_tag = "espnet/Wangyou_Zhang_chime4_enh_train_enh_beamformer_mvdr_raw"
model_downloader = ModelDownloader()
model_config = model_downloader.download_and_unpack(model_tag)
# Initialize the speech enhancement model
enh_model_mc = SeparateSpeech(
train_config=model_config["train_config"],
model_file=model_config["model_file"],
normalize_segment_scale=False,
show_progressbar=True,
ref_channel=4,
normalize_output_wav=True,
device="cpu",
)
# Perform speech enhancement
enhanced_waveform = enh_model_mc(mixwav_mc, sr)
# Save the enhanced waveform to a new audio file
soundfile.write(output_file, enhanced_waveform, sr)
"""
# PTA (https://pytorch.org/audio/stable/index.html)
"""
logging.info("PTA: processing started")
rnnoise_out_filename = "[PTA]" + get_title_from_dbx_path("/app/audio/" + file_name.lower().replace(".wav", "").upper() + ".DBX") + "_-_" + file_name
logging.debug("Torch Version:" + torch.__version__)
logging.info("Torch (Audio) Version: " + torchaudio.__version__)
if rnnoise_out_filename not in output_audio_files:
input_file = "/app/audio/" + file_name
output_raw_file = "/app/audio/modded_file.raw"
output_wav_file = "/app/audio/modded_file.wav"
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
subprocess.run(ffmpeg_command)
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/app/audio/rnnoise_enhanced.raw"]
subprocess.run(rnnoise_command)
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/app/audio/rnnoise_enhanced.raw', output_wav_file]
subprocess.run(ffmpeg_command)
with open(output_wav_file, 'rb') as f:
tmpFile = f.read()
with smbclient.open_file("\\transfer-srv.office.radioteleffh.de\\transfer\\Technik\\sound_processing\\" + rnnoise_out_filename, mode="wb") as fd:
fd.write(tmpFile)
logging.info("PTA: processing finished")
else:
logging.info("PTA: File was already processed in an earlier iteration.")
"""
logging.debug("Files in /app/input_files/:")
logging.debug(os.listdir("/app/input_files/"))
for file_name in listdir_nohidden("/app/input_files/"):
try:
logging.info("########################\t NEW FILE\t########################")
logging.info("Filename: " + file_name)
input_file = "/app/input_files/" + file_name
logging.debug("env_var_processing_tools: " + env_var_processing_tools)
if env_var_processing_tools == "DNF3":
run_dfn3(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
elif env_var_processing_tools == "RNNOISE":
run_rrnoise(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
elif env_var_processing_tools == "ALL":
run_dfn3(input_file, "/app/output_files/[DFN3]" + Path(input_file).stem + ".wav")
run_rrnoise(input_file, "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav")
else:
logging.warning("The given processing tool -->" + env_var_processing_tools + "<-- was not found, please try again with a valid tool.")
except Exception as e:
logging.error("There was an error: " + str(e))
logging.error("Stacktrace: " + str(traceback.format_exc()))