generalize logging
This commit is contained in:
parent
a47fe71bce
commit
e737c3895c
2 changed files with 10 additions and 19 deletions
|
|
@ -35,7 +35,7 @@ def create_logger(log_level: str, logger_name: str = "custom_logger"):
|
|||
|
||||
# Create a formatter and set it for the console handler
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s - %(levelname)s [%(name)s] - %(message)s",
|
||||
"%(asctime)s [%(name)s] | %(levelname)s\t - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
console_handler.setFormatter(formatter)
|
||||
|
|
|
|||
|
|
@ -1,26 +1,17 @@
|
|||
"""Module for tools to process PDF documents"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import io
|
||||
import PyPDF2
|
||||
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
import logging
|
||||
import datetime as dt
|
||||
from datetime import datetime
|
||||
import traceback
|
||||
from common_packages import logging
|
||||
|
||||
# Setup Logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
# level=logging.INFO,
|
||||
format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("/<path>-_" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_debug.log"),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
# instantiate logger
|
||||
logger = logging.create_logger(
|
||||
log_level=os.getenv("LOGGING_LEVEL", "INFO"),
|
||||
logger_name=__name__,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -73,11 +64,11 @@ def read_pdf(pdf_bytes: io.BytesIO) -> tuple:
|
|||
Returns:
|
||||
tuple of lists: (List of chunked text, List of corresponding page numbers).
|
||||
"""
|
||||
logging.info("Reading PDF document")
|
||||
logger.info("Reading PDF document")
|
||||
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
|
||||
|
||||
num_pages = len(pdf_reader.pages)
|
||||
logging.info("Read PDF document with '%s' pages", num_pages)
|
||||
logger.info("Read PDF document with '%s' pages", num_pages)
|
||||
|
||||
text_pages = []
|
||||
for i in range(num_pages):
|
||||
|
|
@ -86,12 +77,12 @@ def read_pdf(pdf_bytes: io.BytesIO) -> tuple:
|
|||
if text:
|
||||
text_pages.append((text, i + 1))
|
||||
|
||||
logging.info("Processing PDF content")
|
||||
logger.info("Processing PDF content")
|
||||
pdf_processor = PDFProcessor()
|
||||
processed_chunks = pdf_processor.chunk_text(text_pages)
|
||||
|
||||
chunks = [chunk for chunk, _ in processed_chunks]
|
||||
pages = [page for _, page in processed_chunks]
|
||||
logging.info("PDF processed. Number of chunks: %s", len(chunks))
|
||||
logger.info("PDF processed. Number of chunks: %s", len(chunks))
|
||||
|
||||
return chunks, pages
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue