generalize logging

2024-07-03 10:14:23 +02:00 · 2024-07-03 10:14:23 +02:00 · e737c3895c
commit e737c3895c
parent a47fe71bce
2 changed files with 10 additions and 19 deletions
--- a/rag-chat-backend/src/common_packages/logging.py
+++ b/rag-chat-backend/src/common_packages/logging.py
@ -35,7 +35,7 @@ def create_logger(log_level: str, logger_name: str = "custom_logger"):
    # Create a formatter and set it for the console handler
    formatter = logging.Formatter(
-        "%(asctime)s - %(levelname)s [%(name)s] - %(message)s",
+        "%(asctime)s [%(name)s] | %(levelname)s\t - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    console_handler.setFormatter(formatter)
--- a/rag-chat-backend/src/preprocessing/pdf.py
+++ b/rag-chat-backend/src/preprocessing/pdf.py
@ -1,26 +1,17 @@
 """Module for tools to process PDF documents"""
 import os
 import sys
 import io
 import PyPDF2
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-import logging
+from common_packages import logging
 import datetime as dt
 from datetime import datetime
 import traceback
-# Setup Logging
+# instantiate logger
-logging.basicConfig(
+logger = logging.create_logger(
-    level=logging.DEBUG,
+    log_level=os.getenv("LOGGING_LEVEL", "INFO"),
-    # level=logging.INFO,
+    logger_name=__name__,
    format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("/<path>-_" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_debug.log"),
        logging.StreamHandler(sys.stdout)
    ]
 )
@ -73,11 +64,11 @@ def read_pdf(pdf_bytes: io.BytesIO) -> tuple:
    Returns:
        tuple of lists: (List of chunked text, List of corresponding page numbers).
    """
-    logging.info("Reading PDF document")
+    logger.info("Reading PDF document")
    pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
    num_pages = len(pdf_reader.pages)
-    logging.info("Read PDF document with '%s' pages", num_pages)
+    logger.info("Read PDF document with '%s' pages", num_pages)
    text_pages = []
    for i in range(num_pages):
@ -86,12 +77,12 @@ def read_pdf(pdf_bytes: io.BytesIO) -> tuple:
        if text:
            text_pages.append((text, i + 1))
-    logging.info("Processing PDF content")
+    logger.info("Processing PDF content")
    pdf_processor = PDFProcessor()
    processed_chunks = pdf_processor.chunk_text(text_pages)
    chunks = [chunk for chunk, _ in processed_chunks]
    pages = [page for _, page in processed_chunks]
-    logging.info("PDF processed. Number of chunks: %s", len(chunks))
+    logger.info("PDF processed. Number of chunks: %s", len(chunks))
    return chunks, pages