From e737c3895c3c6b1ec60e67b3b391134ecce56c3e Mon Sep 17 00:00:00 2001 From: Niklas Mueller Date: Wed, 3 Jul 2024 10:14:23 +0200 Subject: [PATCH] generalize logging --- .../src/common_packages/logging.py | 2 +- rag-chat-backend/src/preprocessing/pdf.py | 27 +++++++------------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/rag-chat-backend/src/common_packages/logging.py b/rag-chat-backend/src/common_packages/logging.py index 5b3e870..2cdc2f8 100644 --- a/rag-chat-backend/src/common_packages/logging.py +++ b/rag-chat-backend/src/common_packages/logging.py @@ -35,7 +35,7 @@ def create_logger(log_level: str, logger_name: str = "custom_logger"): # Create a formatter and set it for the console handler formatter = logging.Formatter( - "%(asctime)s - %(levelname)s [%(name)s] - %(message)s", + "%(asctime)s [%(name)s] | %(levelname)s\t - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) console_handler.setFormatter(formatter) diff --git a/rag-chat-backend/src/preprocessing/pdf.py b/rag-chat-backend/src/preprocessing/pdf.py index 4a38df1..75ac3ba 100644 --- a/rag-chat-backend/src/preprocessing/pdf.py +++ b/rag-chat-backend/src/preprocessing/pdf.py @@ -1,26 +1,17 @@ """Module for tools to process PDF documents""" import os -import sys import io import PyPDF2 from langchain.text_splitter import RecursiveCharacterTextSplitter -import logging -import datetime as dt -from datetime import datetime -import traceback +from common_packages import logging -# Setup Logging -logging.basicConfig( - level=logging.DEBUG, - # level=logging.INFO, - format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s", - handlers=[ - logging.FileHandler("/-_" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_debug.log"), - logging.StreamHandler(sys.stdout) - ] +# instantiate logger +logger = logging.create_logger( + log_level=os.getenv("LOGGING_LEVEL", "INFO"), + logger_name=__name__, ) @@ -73,11 +64,11 @@ def read_pdf(pdf_bytes: io.BytesIO) -> tuple: Returns: tuple of lists: (List of chunked text, List of corresponding page numbers). """ - logging.info("Reading PDF document") + logger.info("Reading PDF document") pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes)) num_pages = len(pdf_reader.pages) - logging.info("Read PDF document with '%s' pages", num_pages) + logger.info("Read PDF document with '%s' pages", num_pages) text_pages = [] for i in range(num_pages): @@ -86,12 +77,12 @@ def read_pdf(pdf_bytes: io.BytesIO) -> tuple: if text: text_pages.append((text, i + 1)) - logging.info("Processing PDF content") + logger.info("Processing PDF content") pdf_processor = PDFProcessor() processed_chunks = pdf_processor.chunk_text(text_pages) chunks = [chunk for chunk, _ in processed_chunks] pages = [page for _, page in processed_chunks] - logging.info("PDF processed. Number of chunks: %s", len(chunks)) + logger.info("PDF processed. Number of chunks: %s", len(chunks)) return chunks, pages