INIT
This commit is contained in:
commit
bf12747492
7 changed files with 290 additions and 0 deletions
40
Dockerfile
Normal file
40
Dockerfile
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
################################################################
|
||||
#
|
||||
# Project: Sound Processing Functions
|
||||
# Created by: Niklas Müller
|
||||
# Created at: 2023.12.21
|
||||
#
|
||||
# podman build -t sound-processing-functions .
|
||||
# podman run -e PROCESSING_TOOL='DNF3' -v '/path/to/audio_video/file/':/app/input_files/ -v /output_path/:/app/output_files/ -d --name sound-processing-functions -t sound-processing-functions
|
||||
# podman stop sound-processing-functions; podman rm sound-processing-functions; podman build -t sound-processing-functions .; podman run -d --name sound-processing-functions sound-processing-functions
|
||||
#
|
||||
################################################################
|
||||
|
||||
|
||||
FROM python:3.11.3-bullseye
|
||||
|
||||
# set the working directory
|
||||
WORKDIR /app
|
||||
RUN mkdir /app/audio
|
||||
RUN mkdir /app/rnnoise
|
||||
|
||||
# Install Packages
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install cmake build-essential ffmpeg git
|
||||
|
||||
# RNNOISE
|
||||
RUN cd /app/rnnoise
|
||||
RUN git clone https://gitlab.xiph.org/xiph/rnnoise.git
|
||||
RUN (cd /app/rnnoise && ./autogen.sh)
|
||||
RUN (cd /app/rnnoise && ./configure)
|
||||
RUN (cd /app/rnnoise && make)
|
||||
RUN (cd /app/rnnoise && make install)
|
||||
|
||||
# install dependencies
|
||||
COPY ./requirements.txt /app
|
||||
RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt
|
||||
|
||||
# copy the scripts to the folder
|
||||
COPY . /app
|
||||
|
||||
CMD ["bash", "init.sh"]
|
||||
13
README.md
Normal file
13
README.md
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# Sound-Processing-Function
|
||||
|
||||
Run DNF3 and/or RNNOISE on any audio you like.
|
||||
|
||||
Just build it:
|
||||
|
||||
```podman build -t sound-processing-functions .```
|
||||
|
||||
and run it
|
||||
|
||||
```podman run -e PROCESSING_TOOL='DNF3' -v '/path/to/audio_video/file/':/app/input_files/ -v /output_path/:/app/output_files/ -d --name sound-processing-functions -t sound-processing-functions```
|
||||
|
||||
With the Environment Variable ```PROCESSING_TOOL``` you can select which processing should be run. If you do not supply the variable, bot all will be run and the name of the outputfile will mention the used processing.
|
||||
0
clean_audio/.gitkeep
Normal file
0
clean_audio/.gitkeep
Normal file
5
init.sh
Normal file
5
init.sh
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
env >> /etc/environment
|
||||
|
||||
/usr/local/bin/python /app/runner.py
|
||||
0
noisy_audio/.gitkeep
Normal file
0
noisy_audio/.gitkeep
Normal file
32
requirements.txt
Normal file
32
requirements.txt
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
smbprotocol
|
||||
requests
|
||||
lxml
|
||||
ffmpeg-python
|
||||
|
||||
|
||||
# DFN (DeepFilterNet)
|
||||
deepfilternet
|
||||
torch
|
||||
torchaudio -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
|
||||
|
||||
# RNNOISE
|
||||
# None needed
|
||||
|
||||
|
||||
# PTA
|
||||
#mir_eval
|
||||
#pesq
|
||||
#pystoi
|
||||
|
||||
|
||||
# ESP
|
||||
#sentencepiece
|
||||
#git+https://github.com/espnet/espnet
|
||||
#espnet_model_zoo
|
||||
# wheel
|
||||
# https://files.pythonhosted.org/packages/4d/9d/9153942f0e2143a43978bcefba31d79187b7037bed3f85a6668c69493062/sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
|
||||
|
||||
# MISC
|
||||
#torchaudio
|
||||
200
runner.py
Normal file
200
runner.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import smbclient
|
||||
import logging
|
||||
import traceback
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from lxml import etree
|
||||
import subprocess
|
||||
|
||||
# DeepFilterNet (DFN3)
|
||||
from df.enhance import enhance, init_df, load_audio, save_audio
|
||||
|
||||
"""
|
||||
# ESPNet
|
||||
import soundfile
|
||||
from espnet_model_zoo.downloader import ModelDownloader
|
||||
from espnet2.bin.enh_inference import SeparateSpeech
|
||||
"""
|
||||
|
||||
"""
|
||||
# PyTorch Audio (PTA)
|
||||
import torch
|
||||
import torchaudio
|
||||
import torchaudio.functional as F
|
||||
from pesq import pesq
|
||||
from pystoi import stoi
|
||||
from torchaudio.utils import download_asset
|
||||
"""
|
||||
|
||||
# Setup Logging
|
||||
logging.basicConfig(
|
||||
# level=logging.ERROR,
|
||||
# level=logging.INFO,
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s [%(name)s] | %(levelname)s\t| %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("/var/log/" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
# Do not show logging for those logging contexts
|
||||
# print([k for k in logging.Logger.manager.loggerDict])
|
||||
for v in [k for k in logging.Logger.manager.loggerDict]:
|
||||
if v.startswith('smbprotocol') or v.startswith('spnego') or v.startswith('smbclient._io') or v.startswith('urllib3.connectionpool'):
|
||||
logging.getLogger(v).disabled = True
|
||||
|
||||
if "PROCESSING_TOOL" in os.environ:
|
||||
env_var_processing_tools = r'{}'.format(os.environ['PROCESSING_TOOL'])
|
||||
else:
|
||||
env_var_processing_tools = "ALL"
|
||||
|
||||
|
||||
def listdir_nohidden(path):
|
||||
for f in os.listdir(path):
|
||||
if not f.startswith('.'):
|
||||
yield f
|
||||
|
||||
|
||||
# DFN3 (https://github.com/Rikorose/DeepFilterNet)
|
||||
def run_dfn3(input_file, output_file):
|
||||
logging.info("DFN3: processing started")
|
||||
|
||||
input_file = "/app/input_files/" + file_name
|
||||
tmp_file = "/tmp/" + Path(input_file).stem + ".wav"
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-acodec', 'pcm_u8', tmp_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
model, df_state, _ = init_df() # Load default model
|
||||
audio, _ = load_audio(tmp_file, sr=df_state.sr())
|
||||
enhanced_audio = enhance(model, df_state, audio)
|
||||
|
||||
save_audio(output_file, enhanced_audio, df_state.sr())
|
||||
|
||||
logging.info("DFN3: processing finished")
|
||||
|
||||
|
||||
# RNNOISE (https://jmvalin.ca/demo/rnnoise/)
|
||||
def run_rrnoise(input_file, output_file):
|
||||
logging.info("RNNOISE: processing started")
|
||||
|
||||
output_raw_file = "/tmp/modded_file.raw"
|
||||
output_wav_file = "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav"
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/tmp/rnnoise_enhanced.raw"]
|
||||
subprocess.run(rnnoise_command)
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/tmp/rnnoise_enhanced.raw', output_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
|
||||
# ESPNET (https://github.com/espnet/espnet)
|
||||
"""
|
||||
logging.info("ESPNET: processing started")
|
||||
input_file = "/app/audio/" + file_name
|
||||
output_file = "/app/audio/modded_" + file_name
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-ar', '16k', '-ac', '2', '-acodec', 'pcm_s16le', output_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
def enhance_speech(input_file, output_file):
|
||||
# Load the input audio file
|
||||
mixwav_mc, sr = soundfile.read(input_file)
|
||||
|
||||
np.set_printoptions(threshold=sys.maxsize)
|
||||
# logging.debug(arr)
|
||||
logging.debug("mixwav_mc")
|
||||
logging.debug(mixwav_mc)
|
||||
logging.debug("mixwav_mc[1]")
|
||||
logging.debug(mixwav_mc[1])
|
||||
|
||||
# mixwav_mc = mixwav_mc[None, :]
|
||||
|
||||
# Download the ESPnet model for speech enhancement
|
||||
model_tag = "espnet/Wangyou_Zhang_chime4_enh_train_enh_beamformer_mvdr_raw"
|
||||
model_downloader = ModelDownloader()
|
||||
model_config = model_downloader.download_and_unpack(model_tag)
|
||||
|
||||
# Initialize the speech enhancement model
|
||||
enh_model_mc = SeparateSpeech(
|
||||
train_config=model_config["train_config"],
|
||||
model_file=model_config["model_file"],
|
||||
normalize_segment_scale=False,
|
||||
show_progressbar=True,
|
||||
ref_channel=4,
|
||||
normalize_output_wav=True,
|
||||
device="cpu",
|
||||
)
|
||||
|
||||
# Perform speech enhancement
|
||||
enhanced_waveform = enh_model_mc(mixwav_mc, sr)
|
||||
|
||||
# Save the enhanced waveform to a new audio file
|
||||
soundfile.write(output_file, enhanced_waveform, sr)
|
||||
"""
|
||||
|
||||
|
||||
# PTA (https://pytorch.org/audio/stable/index.html)
|
||||
"""
|
||||
logging.info("PTA: processing started")
|
||||
rnnoise_out_filename = "[PTA]" + get_title_from_dbx_path("/app/audio/" + file_name.lower().replace(".wav", "").upper() + ".DBX") + "_-_" + file_name
|
||||
|
||||
logging.debug("Torch Version:" + torch.__version__)
|
||||
logging.info("Torch (Audio) Version: " + torchaudio.__version__)
|
||||
|
||||
if rnnoise_out_filename not in output_audio_files:
|
||||
input_file = "/app/audio/" + file_name
|
||||
output_raw_file = "/app/audio/modded_file.raw"
|
||||
output_wav_file = "/app/audio/modded_file.wav"
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-i', input_file, '-f', 's16le', '-acodec', 'pcm_s16le', output_raw_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
rnnoise_command = ['/app/rnnoise/examples/rnnoise_demo', output_raw_file, "/app/audio/rnnoise_enhanced.raw"]
|
||||
subprocess.run(rnnoise_command)
|
||||
|
||||
ffmpeg_command = ['ffmpeg', '-loglevel', '16', '-y', '-f', 's16le', '-ar', '48k', '-ac', '2', '-i', '/app/audio/rnnoise_enhanced.raw', output_wav_file]
|
||||
subprocess.run(ffmpeg_command)
|
||||
|
||||
with open(output_wav_file, 'rb') as f:
|
||||
tmpFile = f.read()
|
||||
with smbclient.open_file("\\transfer-srv.office.radioteleffh.de\\transfer\\Technik\\sound_processing\\" + rnnoise_out_filename, mode="wb") as fd:
|
||||
fd.write(tmpFile)
|
||||
logging.info("PTA: processing finished")
|
||||
else:
|
||||
logging.info("PTA: File was already processed in an earlier iteration.")
|
||||
"""
|
||||
|
||||
|
||||
logging.debug("Files in /app/input_files/:")
|
||||
logging.debug(os.listdir("/app/input_files/"))
|
||||
|
||||
for file_name in listdir_nohidden("/app/input_files/"):
|
||||
try:
|
||||
logging.info("########################\t NEW FILE\t########################")
|
||||
logging.info("Filename: " + file_name)
|
||||
|
||||
input_file = "/app/input_files/" + file_name
|
||||
|
||||
logging.debug("env_var_processing_tools: " + env_var_processing_tools)
|
||||
|
||||
if env_var_processing_tools == "DNF3":
|
||||
run_dfn3(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
|
||||
elif env_var_processing_tools == "RNNOISE":
|
||||
run_rrnoise(input_file, "/app/output_files/" + Path(input_file).stem + ".wav")
|
||||
elif env_var_processing_tools == "ALL":
|
||||
run_dfn3(input_file, "/app/output_files/[DFN3]" + Path(input_file).stem + ".wav")
|
||||
run_rrnoise(input_file, "/app/output_files/[RNNOISE]" + Path(input_file).stem + ".wav")
|
||||
else:
|
||||
logging.warning("The given processing tool -->" + env_var_processing_tools + "<-- was not found, please try again with a valid tool.")
|
||||
|
||||
except Exception as e:
|
||||
logging.error("There was an error: " + str(e))
|
||||
logging.error("Stacktrace: " + str(traceback.format_exc()))
|
||||
Loading…
Add table
Add a link
Reference in a new issue