generalize logging
This commit is contained in:
parent
a47fe71bce
commit
e737c3895c
2 changed files with 10 additions and 19 deletions
|
|
@ -35,7 +35,7 @@ def create_logger(log_level: str, logger_name: str = "custom_logger"):
|
||||||
|
|
||||||
# Create a formatter and set it for the console handler
|
# Create a formatter and set it for the console handler
|
||||||
formatter = logging.Formatter(
|
formatter = logging.Formatter(
|
||||||
"%(asctime)s - %(levelname)s [%(name)s] - %(message)s",
|
"%(asctime)s [%(name)s] | %(levelname)s\t - %(message)s",
|
||||||
datefmt="%Y-%m-%d %H:%M:%S",
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
)
|
)
|
||||||
console_handler.setFormatter(formatter)
|
console_handler.setFormatter(formatter)
|
||||||
|
|
|
||||||
|
|
@ -1,26 +1,17 @@
|
||||||
"""Module for tools to process PDF documents"""
|
"""Module for tools to process PDF documents"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import io
|
import io
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
|
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
|
|
||||||
import logging
|
from common_packages import logging
|
||||||
import datetime as dt
|
|
||||||
from datetime import datetime
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
# Setup Logging
|
# instantiate logger
|
||||||
logging.basicConfig(
|
logger = logging.create_logger(
|
||||||
level=logging.DEBUG,
|
log_level=os.getenv("LOGGING_LEVEL", "INFO"),
|
||||||
# level=logging.INFO,
|
logger_name=__name__,
|
||||||
format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s",
|
|
||||||
handlers=[
|
|
||||||
logging.FileHandler("/<path>-_" + str(datetime.today().strftime('%Y-%m-%d')) + "_-_debug.log"),
|
|
||||||
logging.StreamHandler(sys.stdout)
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -73,11 +64,11 @@ def read_pdf(pdf_bytes: io.BytesIO) -> tuple:
|
||||||
Returns:
|
Returns:
|
||||||
tuple of lists: (List of chunked text, List of corresponding page numbers).
|
tuple of lists: (List of chunked text, List of corresponding page numbers).
|
||||||
"""
|
"""
|
||||||
logging.info("Reading PDF document")
|
logger.info("Reading PDF document")
|
||||||
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
|
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
|
||||||
|
|
||||||
num_pages = len(pdf_reader.pages)
|
num_pages = len(pdf_reader.pages)
|
||||||
logging.info("Read PDF document with '%s' pages", num_pages)
|
logger.info("Read PDF document with '%s' pages", num_pages)
|
||||||
|
|
||||||
text_pages = []
|
text_pages = []
|
||||||
for i in range(num_pages):
|
for i in range(num_pages):
|
||||||
|
|
@ -86,12 +77,12 @@ def read_pdf(pdf_bytes: io.BytesIO) -> tuple:
|
||||||
if text:
|
if text:
|
||||||
text_pages.append((text, i + 1))
|
text_pages.append((text, i + 1))
|
||||||
|
|
||||||
logging.info("Processing PDF content")
|
logger.info("Processing PDF content")
|
||||||
pdf_processor = PDFProcessor()
|
pdf_processor = PDFProcessor()
|
||||||
processed_chunks = pdf_processor.chunk_text(text_pages)
|
processed_chunks = pdf_processor.chunk_text(text_pages)
|
||||||
|
|
||||||
chunks = [chunk for chunk, _ in processed_chunks]
|
chunks = [chunk for chunk, _ in processed_chunks]
|
||||||
pages = [page for _, page in processed_chunks]
|
pages = [page for _, page in processed_chunks]
|
||||||
logging.info("PDF processed. Number of chunks: %s", len(chunks))
|
logger.info("PDF processed. Number of chunks: %s", len(chunks))
|
||||||
|
|
||||||
return chunks, pages
|
return chunks, pages
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue