Use python standard logging framework for app logs

- Stop passing verbose flag around app methods
- Minor remap of verbosity levels to match python logging framework levels
  - verbose = 0 maps to logging.WARN
  - verbose = 1 maps to logging.INFO
  - verbose >=2 maps to logging.DEBUG
- Minor clean-up of app: unused modules, conversation file opening
This commit is contained in:
Debanjum Singh Solanky
2022-09-03 14:43:32 +03:00
parent d0531c3064
commit 094bd18e57
10 changed files with 184 additions and 155 deletions

View File

@@ -6,6 +6,7 @@ import argparse
import pathlib
import glob
import re
import logging
# Internal Packages
from src.utils.helpers import get_absolute_path, is_none_or_empty
@@ -13,32 +14,35 @@ from src.utils.constants import empty_escape_sequences
from src.utils.jsonl import dump_jsonl, compress_jsonl_data
logger = logging.getLogger(__name__)
# Define Functions
def markdown_to_jsonl(markdown_files, markdown_file_filter, output_file, verbose=0):
def markdown_to_jsonl(markdown_files, markdown_file_filter, output_file):
# Input Validation
if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filter):
print("At least one of markdown-files or markdown-file-filter is required to be specified")
exit(1)
# Get Markdown Files to Process
markdown_files = get_markdown_files(markdown_files, markdown_file_filter, verbose)
markdown_files = get_markdown_files(markdown_files, markdown_file_filter)
# Extract Entries from specified Markdown files
entries = extract_markdown_entries(markdown_files)
# Process Each Entry from All Notes Files
jsonl_data = convert_markdown_entries_to_jsonl(entries, verbose=verbose)
jsonl_data = convert_markdown_entries_to_jsonl(entries)
# Compress JSONL formatted Data
if output_file.suffix == ".gz":
compress_jsonl_data(jsonl_data, output_file, verbose=verbose)
compress_jsonl_data(jsonl_data, output_file)
elif output_file.suffix == ".jsonl":
dump_jsonl(jsonl_data, output_file, verbose=verbose)
dump_jsonl(jsonl_data, output_file)
return entries
def get_markdown_files(markdown_files=None, markdown_file_filter=None, verbose=0):
def get_markdown_files(markdown_files=None, markdown_file_filter=None):
"Get Markdown files to process"
absolute_markdown_files, filtered_markdown_files = set(), set()
if markdown_files:
@@ -56,10 +60,9 @@ def get_markdown_files(markdown_files=None, markdown_file_filter=None, verbose=0
}
if any(files_with_non_markdown_extensions):
print(f"[Warning] There maybe non markdown-mode files in the input set: {files_with_non_markdown_extensions}")
logger.warn(f"[Warning] There maybe non markdown-mode files in the input set: {files_with_non_markdown_extensions}")
if verbose > 0:
print(f'Processing files: {all_markdown_files}')
logger.info(f'Processing files: {all_markdown_files}')
return all_markdown_files
@@ -81,7 +84,7 @@ def extract_markdown_entries(markdown_files):
return entries
def convert_markdown_entries_to_jsonl(entries, verbose=0):
def convert_markdown_entries_to_jsonl(entries):
"Convert each Markdown entries to JSON and collate as JSONL"
jsonl = ''
for entry in entries:
@@ -89,8 +92,7 @@ def convert_markdown_entries_to_jsonl(entries, verbose=0):
# Convert Dictionary to JSON and Append to JSONL string
jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n'
if verbose > 0:
print(f"Converted {len(entries)} to jsonl format")
logger.info(f"Converted {len(entries)} to jsonl format")
return jsonl