#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Main command line run control tool for CLEV2ER algorithm framework chains

    Setup requires:
        
        Set CLEV2ER_BASE_DIR to point to the base directory of the CLEV2ER framework
            export CLEV2ER_BASE_DIR=/Users/alanmuir/software/clev2er

        PYTHONPATH to include $CLEV2ER_BASE_DIR/src
            export PYTHONPATH=$PYTHONPATH:$CLEV2ER_BASE_DIR/src

    Example usage:
        To list all command line options:

        `python run_chain.py -h`

        b) Run the cryotempo land ice chain on a single L2b file:

        `python run_chain.py --name cryotempo -f \
        $CLEV2ER_BASE_DIR/testdata/cs2/l1bfiles/\
            CS_OFFL_SIR_LRM_1B_20200930T235609_20200930T235758_D001.nc`

        a) Run the cryotempo land ice chain on all l1b files in 
           $CLEV2ER_BASE_DIR/testdata/cs2/l1bfiles

        `python run_chain.py --name cryotempo --dir $CLEV2ER_BASE_DIR/testdata/cs2/l1bfiles`

         Run with multi-processing and shared memory enabled (also can set these in main config):

        `python run_chain.py --name cryotempo -d $CLEV2ER_BASE_DIR/testdata/cs2/l1bfiles -sm -mp`
        
"""

import argparse
import glob
import importlib
import logging
import multiprocessing as mp
import os
import re
import sys
import time
import traceback
import types
from datetime import datetime
from logging.handlers import QueueHandler
from math import ceil
from multiprocessing import Process, Queue, current_process
from typing import Any, List, Optional, Type

import numpy as np
import toml
from codetiming import Timer
from netCDF4 import Dataset  # pylint: disable=E0611

from clev2er.algorithms.base.base_alg import AlgorithmError
from clev2er.utils.breakpoints.breakpoint_files import write_breakpoint_file
from clev2er.utils.config.load_config_settings import (
    load_algorithm_list,
    load_config_files,
)
from clev2er.utils.logging_funcs import get_logger

# pylint: disable=too-many-locals
# pylint: disable=too-many-branches
# pylint: disable=too-many-statements
# pylint: disable=too-many-arguments
# pylint: disable=too-many-nested-blocks
# pylint: disable=too-many-lines
# pylint: disable=R0801


def exception_hook(
    exc_type: Type[BaseException],
    exc_value: BaseException,
    exc_traceback: Optional[types.TracebackType],
) -> None:
    """
    log Exception traceback output to the error log, instead of just to the console
    Without this, these error can get missed when the console is not checked
    """
    logging.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))


# Set the excepthook to our custom function that logs exceptions to the error log
sys.excepthook = exception_hook


def custom_key(line):
    """search function to find N in line containing [fN]

    Args:
        line (str): string containing [fN], where N is an int which may be large

    Returns:
        N or 0 if not matched
    """
    match = re.search(r"\[f(\d+)\]", line)
    if match:
        return int(match.group(1))
    return 0


def sort_file_by_number(filename: str) -> None:
    """sort log file by N , where log lines contain the string [fN]

    Args:
        filename (str): log file path
    """
    with open(filename, "r", encoding="utf8") as file:
        lines = file.readlines()

    sorted_lines = sorted(lines, key=custom_key)

    with open(filename, "w", encoding="utf8") as file:
        file.writelines(sorted_lines)


def insert_txtfile1_in_txtfile2_after_line_containing_string(
    file1: str, file2: str, target_string: str
) -> None:
    """Inserts txtfile1 in txtfile2 after line containing target_string

    Args:
        file1 (str): path of txt file1
        file2 (str): path of txt file2
        target_string (str): string to search for in file2 and insert contents of file1 after
    """
    with open(file1, "r", encoding="utf-8") as fd1:
        content1 = fd1.read()

    with open(file2, "r", encoding="utf-8") as fd2:
        lines = fd2.readlines()

    new_lines = []
    for line in lines:
        new_lines.append(line)
        if target_string in line:
            new_lines.append(content1)

    with open(file2, "w", encoding="utf-8") as fd2:
        fd2.writelines(new_lines)


def append_file(file1_path: str, file2_path: str) -> None:
    """appends contents of file1_path to end of  file2_path

    Args:
        file1_path (str): txt file to append
        file2_path (str): txt file to append to end of
    """
    with open(file1_path, "r", encoding="utf-8") as file1:
        with open(file2_path, "a", encoding="utf-8") as file2:
            file2.write(file1.read())


def remove_strings_from_file(filename: str) -> None:
    """removes any string [fN] from the txt file

        where N is any integer

    Args:
        filename (str): file name
    """
    # Open the input file in read mode
    with open(filename, "r", encoding="utf-8") as file:
        # Read all the lines from the file
        lines = file.readlines()

    # Create a regular expression pattern to match strings of the form '[fN]'
    pattern = r"\[f\d+\]"

    # Remove the matched strings from each line
    modified_lines = [re.sub(pattern, "", line) for line in lines]

    # Open the modified file in write mode
    with open(filename, "w", encoding="utf-8") as file:
        # Write the modified lines to the file
        file.writelines(modified_lines)


def run_chain_on_single_file(
    l1b_file: str,
    alg_object_list: list[Any],
    config: dict,
    log: logging.Logger,
    log_queue: Optional[Queue],
    rval_queue: Optional[Queue],
    filenum: int,
    breakpoint_alg_name: str = "",
) -> tuple[bool, str, str, dict]:
    """Runs the algorithm chain on a single L1b file.

       This function is run in a separate process if multi-processing is enabled.

    Args:
        l1b_file (str): path of L1b file to process
        alg_object_list (list[Algorithm]): list of Algorithm objects
        log (logging.Logger): logging instance to use
        log_queue (Queue): Queue for multi-processing logging
        rval_queue (Queue) : Queue for multi-processing results
        filenum (int) : file number being processed
        breakpoint_alg_name (str) : if not '', name of algorithm to break after.

    Returns:
        Tuple(bool,str,str,dict):
        : algorithms success (True) or Failure (False),
        : '' or error string
        : path of breakpoint file or ''
        : shared_dict
        for multi-processing return values are instead queued -> rval_queue for this process
    """

    bp_filename = ""  # break point file path
    shared_dict = {}  # shared dictionary to pass data between algorithms

    # Setup logging either for multi-processing or standard (single process)
    if config["chain"]["use_multi_processing"]:
        # create a logger
        logger = logging.getLogger("mp")
        # add a handler that uses the shared queue
        if log_queue is not None:
            handler = QueueHandler(log_queue)
            handler.setFormatter(logging.Formatter(f"[f{filenum}] %(message)s"))
            logger.addHandler(handler)
        # log all messages, debug and up
        logger.setLevel(logging.DEBUG)
        # get the current process
        process = current_process()
        # report initial message
        logger.debug("[f%d] Child %s starting.", filenum, process.name)
        thislog = logger
    else:
        thislog = log

    thislog.info("_%s", "_" * 79)  # add a divider line in the log

    thislog.info("Processing file %d: %s", filenum, l1b_file)

    try:  # and open the NetCDF file
        with Dataset(l1b_file) as nc:
            # ------------------------------------------------------------------------
            # Run each algorithms .process() function in order
            # ------------------------------------------------------------------------

            shared_dict["l1b_file_name"] = l1b_file

            for alg_obj in alg_object_list:
                alg_obj.set_filenum(filenum)
                alg_obj.set_log(thislog)
                # Run the Algorithm's process() function. Note that for multi-processing
                # the process() function also calls the init() function first

                success, error_str = alg_obj.process(nc, shared_dict)
                if not success:
                    if "SKIP_OK" in error_str:
                        thislog.info(
                            "Processing of L1b file %d : %s SKIPPED because %s",
                            filenum,
                            l1b_file,
                            error_str.strip("SKIP_OK ").strip(),
                        )
                    else:
                        thislog.error(
                            "Processing of L1b file %d : %s stopped because %s",
                            filenum,
                            l1b_file,
                            error_str,
                        )

                    if config["chain"]["use_multi_processing"]:
                        if rval_queue is not None:
                            rval_queue.put((False, error_str, Timer.timers))
                        # Free up resources by running the Algorithm.finalize() on each
                        # algorithm instance
                        for alg_obj in alg_object_list:
                            if alg_obj.initialized:
                                alg_obj.finalize(stage=5)
                    return (False, error_str, bp_filename, shared_dict)

                if alg_obj.alg_name.rsplit(".", maxsplit=1)[-1] == breakpoint_alg_name:
                    thislog.debug("breakpoint reached at algorithm %s", alg_obj.alg_name)
                    bp_filename = write_breakpoint_file(
                        config, shared_dict, thislog, breakpoint_alg_name
                    )
                    break

            if config["chain"]["use_multi_processing"]:
                # Free up resources by running the Algorithm.finalize() on each
                # algorithm instance
                for alg_obj in alg_object_list:
                    if alg_obj.initialized:
                        alg_obj.finalize(stage=6)

    except (IOError, ValueError, KeyError, IndexError):
        error_str = f"Error processing {l1b_file}: {traceback.format_exc()}"
        thislog.debug(error_str)
        exc_type, exc_value, exc_tb = sys.exc_info()
        if exc_tb is not None:
            filename = exc_tb.tb_frame.f_code.co_filename
            line_number = exc_tb.tb_lineno
        else:
            filename = "unknown"
            line_number = 0

        thislog.error(
            "Processing error : %s:%s [%s:line %d]",
            exc_type.__name__ if exc_type is not None else "",
            exc_value,
            filename,
            line_number,
        )
        if config["chain"]["use_multi_processing"]:
            if rval_queue is not None:
                rval_queue.put((False, error_str, Timer.timers))  # pass the function return values
                # back to the parent process
                # via a queue
        return (False, error_str, bp_filename, shared_dict)

    if config["chain"]["use_multi_processing"]:
        if rval_queue is not None:
            rval_queue.put((True, "", Timer.timers))
    return (True, "", bp_filename, shared_dict)


def mp_logger_process(queue, config) -> None:
    """executed in a separate process that performs logging
       used for when multi-processing only

    Args:
        queue (Queue): object created by multiprocessing.Queue()
        config (dict): main config dictionary for log file paths
    """
    # create a logger
    logger = logging.getLogger("mp")
    if config["log_files"].get("debug_mode", False):
        log_format = "[%(levelname)-2s] : %(asctime)s : %(name)-12s :  %(message)s"
    else:
        log_format = "[%(levelname)-2s] : %(asctime)s :  %(message)s"

    log_formatter = logging.Formatter(log_format, datefmt="%d/%m/%Y %H:%M:%S")

    # only includes ERROR level messages
    file_handler_error = logging.FileHandler(config["log_files"]["error_name"] + ".mp", mode="w")
    file_handler_error.setFormatter(log_formatter)
    file_handler_error.setLevel(logging.ERROR)
    logger.addHandler(file_handler_error)

    # include all allowed log levels up to INFO (ie ERROR, WARNING, INFO, not DEBUG)
    file_handler_info = logging.FileHandler(config["log_files"]["info_name"] + ".mp", mode="w")
    file_handler_info.setFormatter(log_formatter)
    file_handler_info.setLevel(logging.INFO)
    logger.addHandler(file_handler_info)

    # include all allowed log levels up to DEBUG
    if config["log_files"].get("debug_mode", False):
        file_handler_debug = logging.FileHandler(
            config["log_files"]["debug_name"] + ".mp", mode="w"
        )
        file_handler_debug.setFormatter(log_formatter)
        file_handler_debug.setLevel(logging.DEBUG)
        logger.addHandler(file_handler_debug)

    # run forever
    while True:
        # consume a log message, block until one arrives
        message = queue.get()
        # check for shutdown
        if message is None:
            break
        # log the message
        logger.handle(message)


def run_chain(
    l1b_file_list: list[str],
    config: dict,
    algorithm_list: list[str],
    log: logging.Logger,
    breakpoint_alg_name: str = "",
) -> tuple[bool, int, int, int, str, dict]:
    """Run the algorithm chain in algorithm_list on each L1b file in l1b_file_list
       using the configuration settings in config

    Args:
        l1b_file_list (list[str]): list of l1b files paths to process
        config (dict): configuration dictionary. This is the named chain config and the
                                                 main config merged
        algorithm_list (list[str]): list of algorithm names
        log (logging.Logger): log instance to use
        breakpoint_alg_name (str): name of algorithm to set break point after.
                                   Default='' (no breakpoint set here)

    Returns:
        tuple(bool,int,int, int,str,dict) :
        :chain success or failure,
        :number_of_errors,
        :number of files processed,
        :number of files skipped(for valid reasons),
        :breakpoint filename
        :shared_dict (of last file processed in sequential mode only, else {})
    """

    n_files = len(l1b_file_list)
    breakpoint_filename = ""
    shared_dict: dict = {}

    # -------------------------------------------------------------------------------------------
    # Load the dynamic algorithm modules from clev2er/algorithms/<algorithm_name>.py
    #   - runs each algorithm object's __init__() function
    # -------------------------------------------------------------------------------------------
    alg_object_list = []
    shared_mem_alg_object_list: List[Any] = []
    # duplicate list used to call initialization
    # of shared memory resources where used.

    log.info("Dynamically importing and initializing algorithms from list...")

    for alg in algorithm_list:
        # --------------------------------------------------------------------
        # Dynamically import each Algorithm from the list
        # --------------------------------------------------------------------
        try:
            module = importlib.import_module(
                f"clev2er.algorithms.{config['chain']['chain_name']}.{alg}"
            )
        except ImportError as exc:
            log.error("Could not import algorithm %s, %s", alg, exc)
            return (False, 1, 0, 0, breakpoint_filename, shared_dict)

        # --------------------------------------------------------------------
        # Create an instance of each Algorithm,
        #   - runs its __init__(config) function
        # --------------------------------------------------------------------

        # Load/Initialize algorithm
        try:
            alg_obj = module.Algorithm(config, log)
        except (FileNotFoundError, IOError, KeyError, ValueError, OSError, AlgorithmError) as exc:
            exc_type, exc_value, exc_tb = sys.exc_info()
            if exc_tb is not None:
                filename = exc_tb.tb_frame.f_code.co_filename
                line_number = exc_tb.tb_lineno
            else:
                filename = "unknown"
                line_number = 0
            log.error(
                "Could not initialize algorithm %s due to %s:%s at [%s:line %d]",
                alg,
                exc_type.__name__ if exc_type is not None else "",
                exc_value,
                filename,
                line_number,
            )
            log.debug("%s", exc)
            return (False, 1, 0, 0, breakpoint_filename, shared_dict)

        alg_object_list.append(alg_obj)

        # --------------------------------------------------------------------
        # Create a second instance of each Algorithm for multi-processing
        # shared memory buffer allocations,
        #   - runs its __init__(config) function
        # Note that the .process() function is never run for this instance
        # We merge  {"_init_shared_mem": True} to the config so that the
        # Algorithm knows to run any shared memory initialization
        # --------------------------------------------------------------------

        if config["chain"]["use_multi_processing"] and config["chain"]["use_shared_memory"]:
            # Load/Initialize algorithm
            try:
                alg_obj_shm = module.Algorithm(config | {"_init_shared_mem": True}, log)
            except (FileNotFoundError, IOError, KeyError, ValueError):
                log.error(
                    "Could not initialize algorithm for shared_memory %s, %s",
                    alg,
                    traceback.format_exc(),
                )
                # If there is a failure we must clean up any shared memory already allocated
                for alg_obj_shm in shared_mem_alg_object_list:
                    if alg_obj_shm.initialized:
                        alg_obj_shm.finalize(stage=4)

                return (False, 1, 0, 0, breakpoint_filename, shared_dict)

            shared_mem_alg_object_list.append(alg_obj_shm)

        # If a breakpoint after this alg is set we don't need to initialize any more algorithms
        if alg == breakpoint_alg_name:
            log.debug("breakpoint reached in import for %s", alg)
            break
    # -------------------------------------------------------------------------------------------
    #  Run algorithm chain's Algorthim.process() on each L1b file in l1b_file_list
    #    - a different method required for multi-processing or standard sequential processing
    #    - Note that choice of MP method is due to logging reliability constraints, which
    #      caused problems with simpler more modern pool.starmap methods
    # -------------------------------------------------------------------------------------------
    num_errors = 0
    num_files_processed = 0
    num_skipped = 0

    # --------------------------------------------------------------------------------------------
    # Parallel Processing (optional)
    # --------------------------------------------------------------------------------------------
    if config["chain"]["use_multi_processing"]:  # pylint: disable=R1702
        # With multi-processing we need to redirect logging to a stream

        # create a shared logging queue for multiple processes to use
        log_queue: Queue = Queue()

        # create a logger
        new_logger = logging.getLogger("mp")
        # add a handler that uses the shared queue
        new_logger.addHandler(QueueHandler(log_queue))
        # log all messages, debug and up
        new_logger.setLevel(log.level)
        # start the logger process
        logger_p = Process(target=mp_logger_process, args=(log_queue, config))
        logger_p.start()

        # Divide up the input files in to chunks equal to maximum number of processes
        # allowed == config["chain"]["max_processes_for_multiprocessing"]

        log.info(
            "Using multi-processing with max %d processes",
            config["chain"]["max_processes_for_multiprocessing"],
        )

        num_chunks = ceil(n_files / config["chain"]["max_processes_for_multiprocessing"])

        file_indices_chunks = np.array_split(list(range(n_files)), num_chunks)

        for chunk_num, file_indices in enumerate(file_indices_chunks):
            chunked_l1b_file_list = np.array(l1b_file_list)[file_indices]
            log.debug(f"mp chunk_num {chunk_num}: chunked_l1b_file_list={chunked_l1b_file_list}")

            num_procs = len(chunked_l1b_file_list)

            log.info(
                "Running process set %d of %d (containing %d processes)",
                chunk_num + 1,
                num_chunks,
                num_procs,
            )

            # Create separate queue for each new process to handle function return values
            rval_queues: List[Queue] = [Queue() for _ in range(num_procs)]

            # configure child processes
            processes = [
                Process(
                    target=run_chain_on_single_file,
                    args=(
                        chunked_l1b_file_list[i],
                        alg_object_list,
                        config,
                        None,
                        log_queue,
                        rval_queues[i],
                        file_indices[i],
                        breakpoint_alg_name,
                    ),
                )
                for i in range(num_procs)
            ]
            # start child processes
            for process in processes:
                process.start()

            # wait for child processes to finish
            for i, process in enumerate(processes):
                process.join()
                # retrieve return values of each process function from queue
                # rval=(bool, str, Timer.timers)
                while not rval_queues[i].empty():
                    rval = rval_queues[i].get()
                    if not rval[0] and "SKIP_OK" not in rval[1]:
                        num_errors += 1
                    if "SKIP_OK" in rval[1]:
                        num_skipped += 1
                    num_files_processed += 1
                    # rval[2] returns the Timer.timers dict for algorithms process() function
                    # ie a dict containing timers['alg_name']= the number of seconds elapsed
                    for key, value in rval[2].items():
                        if key in Timer.timers:
                            Timer.timers.add(key, value)
                        else:
                            Timer.timers.add(key, value)

        # shutdown the queue correctly
        log_queue.put(None)

        log.info("MP processing completed with outputs logged:")
    # --------------------------------------------------------------------------------------------
    # Sequential Processing
    # --------------------------------------------------------------------------------------------
    else:  # Normal sequential processing (when multi-processing is disabled)
        try:
            for fnum, l1b_file in enumerate(l1b_file_list):
                log.info("\n%sProcessing file %d of %d%s", "-" * 20, fnum + 1, n_files, "-" * 20)
                success, error_str, breakpoint_filename, shared_dict = run_chain_on_single_file(
                    l1b_file,
                    alg_object_list,
                    config,
                    log,
                    None,
                    None,
                    fnum,
                    breakpoint_alg_name,
                )
                num_files_processed += 1
                if not success and "SKIP_OK" in error_str:
                    log.debug("Skipping file")
                    num_skipped += 1
                    continue
                if not success:
                    num_errors += 1

                    if config["chain"]["stop_on_error"]:
                        log.error(
                            "Chain stopped because of error processing L1b file %s",
                            l1b_file,
                        )
                        break

                    log.error(
                        "Error processing L1b file %s, skipping file",
                        l1b_file,
                    )
                    continue
        except KeyboardInterrupt as exc:
            log.error("KeyboardInterrupt detected", exc)

    # -----------------------------------------------------------------------------
    # Run each algorithms .finalize() function in order
    # -----------------------------------------------------------------------------

    log.debug("_" * 79)  # add a divider line in the log

    for alg_obj_shm in shared_mem_alg_object_list:
        if alg_obj_shm.initialized:
            alg_obj_shm.finalize(stage=2)

    for alg_obj in alg_object_list:
        if alg_obj.initialized:
            alg_obj.finalize(stage=3)

    # Elapsed time for each algorithm.
    # Note if multi-processing, process times are added for each algorithm
    # (so total time processing will be less)
    #  - ie for processes p1 and p2,
    #  -    alg1.time =(p1.alg1.time +p2.alg1.time +,..)
    #  -    alg2.time =(p1.alg2.time +p2.alg2.time +,..)

    log.info("\n%sAlgorithm Cumulative Processing Time%s", "-" * 20, "-" * 20)

    for algname, cumulative_time in Timer.timers.items():
        log.info("%s %.3f s", algname, cumulative_time)

    return (
        (num_errors == 0),
        num_errors,
        num_files_processed,
        num_skipped,
        breakpoint_filename,
        shared_dict,
    )


def extract_version(toml_file_path):
    """extract the version string from pyproject.toml files

    Args:
        toml_file_path (str): path of pyproject.toml

    Returns:
        str: semantic version string, such as "0.1.0"
    """

    # Load the pyproject.toml file
    with open(toml_file_path, "r", encoding="utf-8") as file:
        pyproject_data = toml.load(file)

    # Extract the version string
    toml_version = (
        pyproject_data.get("tool", {}).get("poetry", {}).get("version", "Version not found")
    )

    return toml_version


def valid_datetime(date_string: str, is_end_date: bool = False) -> datetime:
    """Validate the date string with optional time and timezone.

    Args:
        date_string (str): The date string to validate. Expected formats are
                           `YYYY-MM-DD` or `YYYY-MM-DD HH:MM:SS`.
        is_end_date (bool): If True and the date string is `YYYY-MM-DD`, return the end of the day.

    Returns:
        datetime: A `datetime` object parsed from the input string.

    Raises:
        argparse.ArgumentTypeError: If the date string does not match the valid formats.
    """
    date_format = r"^\d{4}-\d{2}-\d{2}$"
    datetime_format = r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$"

    if re.match(date_format, date_string):
        # Date only format
        dt = datetime.strptime(date_string, "%Y-%m-%d")
        if is_end_date:
            dt = dt.replace(hour=23, minute=59, second=59, microsecond=999999)
        return dt
    if re.match(datetime_format, date_string):
        # Date and time format
        return datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")

    raise argparse.ArgumentTypeError(
        f"Not a valid date: '{date_string}'. Expected format"
        " is 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'."
    )


def comma_separated_intvals(value: str) -> list[int]:
    """convert comma separated values in a str to a list of int

    Args:
        value (str): string to process

    Raises:
        argparse.ArgumentTypeError: if wrong types used

    Returns:
        list[int]: list of integers
    """
    try:
        years = [int(year) for year in value.split(",")]
    except ValueError as exc:
        raise argparse.ArgumentTypeError(
            f"Invalid year format: {value}. Must be comma-separated integers."
        ) from exc
    return years


def filter_files_by_instrument_mode_substrings(
    file_list: List[str], substrings: List[str]
) -> List[str]:
    """
    Filters the list of file paths to include only those that match any of the specified substrings.

    :param file_list: List of file paths.
    :param substrings: List of substrings to match (e.g., ['SIC', 'SAC']).
    :return: List of file paths that contain any of the specified substrings.
    """
    # Escape each substring and join them with | to create a regex pattern
    substrings_pattern = "|".join(re.escape(substring) for substring in substrings)

    # Compile the regex pattern to match any of the substrings in the specified position
    pattern = re.compile(rf"CR._IR_1B_.*_({substrings_pattern})_")

    # Filter the file paths using the compiled regex pattern
    return [file_path for file_path in file_list if pattern.search(file_path)]


def filter_files_by_processing_mode_substrings(
    file_list: List[str], substrings: List[str]
) -> List[str]:
    """
    Filters the list of file paths to include only those that match any of the specified substrings.

    :param file_list: List of file paths.
    :param substrings: List of substrings to match (e.g., ['HR', 'FF']).
    :return: List of file paths that contain any of the specified substrings.
    """
    # Escape each substring and join them with | to create a regex pattern
    substrings_pattern = "|".join(re.escape(substring) for substring in substrings)

    # Compile the regex pattern to match any of the substrings in the specified position
    pattern = re.compile(rf"CR._IR_1B_({substrings_pattern})_")

    # Filter the file paths using the compiled regex pattern
    return [file_path for file_path in file_list if pattern.search(file_path)]


def set_nested_key(config: dict[str, Any], keys: List[str], value: Any) -> None:
    """
    Set a value in a nested dictionary.

    Args:
        config (Dict[str, Any]): The dictionary to update.
        keys (List[str]): A list of keys representing the path to the nested value.
        value (Any): The value to set at the specified nested location.

    Returns:
        None
    """
    for key in keys[:-1]:
        config = config.setdefault(key, {})
    config[keys[-1]] = value


def main() -> None:
    """main function"""

    this_version = "0.1.3"

    # ----------------------------------------------------------------------
    # Process Command Line Arguments for tool
    # ----------------------------------------------------------------------

    # initiate the command line parser
    parser = argparse.ArgumentParser()

    # add each argument

    parser.add_argument(
        "--alglist",
        "-a",
        help=(
            "[Optional] path of algorithm list YML file,"
            "default is an empty str which will result in a search for highest version list "
            "files in ${CLEV2ER_BASE_DIR}/config/algorithm_lists/<chain_name>_<B><VVV>.[yml,xml] "
            "where <B> is the uppercase baseline character A..Z, and <VVV> is the zero padded "
            "version number, ie 001"
        ),
        default="",
    )

    parser.add_argument(
        "--breakpoint_after",
        "-bp",
        help=("[Optional, str] algorithm_name : set a breakpoint after the named algorithm "),
        type=str,
    )

    parser.add_argument(
        "--conf",
        "-c",
        help=(
            "[Optional] alternative path of chain controller configuration file (XML format),"
            "default=$CLEV2ER_BASE_DIR/config/chain_configs/<chain_name>/<chain_name>_config.xml"
        ),
        default="",
    )

    parser.add_argument(
        "--conf_opts",
        "-co",
        help=(
            "[Optional] Comma separated list of config options to add/modify the  "
            "configuration dictionary passed to algorithms and finder classes. "
            "Each option can include a value. The value is appended to the option key after a : "
            "Use key:true, or key:false, or key:value. If no value is included with a single level "
            "key, it indicates a boolean true. "
            "For multi-level keys, use another colon, ie key1:key2:value. "
            "Example: -co sin_only:true  or -co sin_only are both the same to only select SIN L1b "
            "files. "
            "These are chain specific and may have different meanings for other chains. "
            "Note that these options override any identical key:values in chain configuration files"
        ),
        type=str,
    )

    parser.add_argument(
        "--continue_on_error",
        "-ce",
        help=(
            "[Optional] continue processing next L1b file on error, logging the errors. "
            "Default is set in main config file"
        ),
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--debug",
        "-de",
        help=(
            "[Optional] debug mode. log.DEBUG messages are output to the debug log file, "
            "configured in the main config file. By default log.DEBUG messages are not output."
        ),
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--dir",
        "-d",
        help=("[Optional] path of a directory containing input L1b files"),
    )

    parser.add_argument(
        "--end_date",
        "-e",
        type=lambda s: valid_datetime(s, is_end_date=True),
        required=False,
        help='Optional End date for L1b selection. Format is: YYYY-MM-DD or "YYYY-MM-DD HH:MM:SS"',
    )

    parser.add_argument(
        "--file",
        "-f",
        help=("[Optional] path of a single input L1b file"),
    )

    parser.add_argument(
        "--imode",
        "-im",
        choices=["SIC", "SAC", "SIO"],
        required=False,
        nargs="+",
        help=(
            "Optional L1b file selection by one or more CRISTAL instrument modes: SIC, SAC, SIO,"
            "where SIC=SARin Closed Burst, SIO=SARin Open-Burst, SAC=SAR Closed-Burst. "
            "To select multiple modes separate each by a space: example --imode SIC SAC"
        ),
    )

    parser.add_argument(
        "--list",
        "-ls",
        help=("[Optional] list algorithms enabled in chain, and exit"),
        required=False,
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--logstring",
        "-lgs",
        help=(
            "[Optional] append this string to log file names for this run, as "
            "<logfilename>_<this_string>.log"
        ),
        type=str,
    )

    parser.add_argument(
        "--name",
        "-n",
        help=(
            "name (str) : chain name. Should contain no spaces or special chars other than _. "
            "Algorithm modules for this chain are located in "
            "${CLEV2ER_BASE_DIR}/src/algorithms/<name>.\n"
            "Actual algorithms used for the chain and their order are chosen in "
            "a separate algorithm list (see --alglist)"
        ),
        required=False,
    )

    parser.add_argument(
        "--nprocs",
        "-np",
        help=(
            "[Optional] maximum number of cores to split multi-processing on. "
            "Overrides setting in main config file"
        ),
        type=int,
    )

    parser.add_argument(
        "--max_files",
        "-mf",
        help=("[Optional] limit number of input files to this number"),
        type=int,
    )

    parser.add_argument(
        "--mconf",
        "-mc",
        help=(
            "[Optional] path of main controller configuration file (XML format),"
            "default=$CLEV2ER_BASE_DIR/config/main_config.xml"
        ),
        default="",  # empty string results in use of $CLEV2ER_BASE_DIR/config/main_config.xml
    )

    parser.add_argument(
        "--month",
        "-m",
        help=(
            "[Optional] comma separated month numbers (1,12) to use to select L1b files. "
            "Example --month 1,2 or --month 12"
        ),
        type=comma_separated_intvals,
    )

    parser.add_argument(
        "--multiprocessing",
        "-mp",
        help=(
            "[Optional] use multi-processing, overrides main config file use_multi_processing "
            "setting to true"
        ),
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--pmode",
        "-pm",
        choices=["HR", "FF", "LRM", "PLR", "LR", "LOS", "LMC", "FBR"],
        required=False,
        nargs="+",
        help=(
            "Optional L1b file selection by one or more CRISTAL delay doppler processing modes: "
            "HR,FF,LRM,PLR,LR,LOS,LMC,FBR,"
            "where HR=High Rate, FF=Fully Focussed, LRM=Low Rate Mode, PLR=Pseudo LRM,.. "
            "To select multiple modes separate each by a space: example --pmode HR FF"
        ),
    )

    parser.add_argument(
        "--quiet",
        "-q",
        help=("[Optional] do not output log messages to stdout"),
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--recursive",
        "-r",
        help=("[Optional] recursively find files in a directory and sub-directories"),
        action="store_true",
    )

    parser.add_argument(
        "--sequentialprocessing",
        "-sp",
        help=(
            "[Optional] use sequential (standard) processing, overrides main config file "
            "use_multi_processing setting to false"
        ),
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--sharedmem",
        "-sm",
        help=("[Optional] use shared memory when multi-processing is enabled"),
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--start_date",
        "-s",
        type=valid_datetime,
        required=False,
        help=(
            "Optional Start date for L1b selection. "
            'Format is: YYYY-MM-DD or "YYYY-MM-DD HH:MM:SS"'
        ),
    )

    parser.add_argument(
        "--stop_on_error",
        "-st",
        help=("[Optional] stop chain on first error. Default is set in main config file"),
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--version",
        "-v",
        help=("[Optional] print release version and exit"),
        action="store_const",
        const=1,
    )

    parser.add_argument(
        "--year",
        "-y",
        help=(
            "[Optional] comma separated year numbers (YYYY,YYYY) to use to select L1b files. "
            "Example --year 2021  or --year 2021,2022. If --month is not used then every month"
            " (1,12) is selected. All files found within the month and year are selected."
        ),
        type=comma_separated_intvals,
    )

    # read arguments from the command line
    args = parser.parse_args()

    if args.version:
        print(f"run_chain executable release version : {this_version}")
        config_version = extract_version(
            os.path.join(f'{os.environ["CLEV2ER_BASE_DIR"]}', "pyproject.toml")
        )
        print(f"config version : {config_version}")
        if config_version != this_version:
            sys.exit(
                f"WARNING: config version {config_version} does not equal executable version "
                "{this_version}"
            )
        sys.exit(0)

    if not args.name:
        sys.exit("ERROR: missing command line argument --name <chain_name>")

    # -------------------------------------------------------------------------
    # Load main XML controller configuration file
    #   - default is $CLEV2ER_BASE_DIR/config/main_config.xml
    #   - or set by --conf <filepath>.xml
    # -------------------------------------------------------------------------

    try:
        config, config_file, chain_config_file = load_config_files(
            args.name,
            main_config_file=args.mconf,
            chain_config_file=args.conf,
        )
    except (KeyError, OSError, ValueError):
        if args.debug:
            sys.exit(f"Loading config file error: {traceback.format_exc()}")
        else:
            sys.exit("Error loading chain configuration files. Use --debug for more info")

    # -------------------------------------------------------------------------
    # Modify  config settings from command line args and store modifications
    # to report later
    # -------------------------------------------------------------------------

    modified_args = []

    if args.breakpoint_after:
        modified_args.append(f"breakpoint_after={args.breakpoint_after}")
    if args.quiet:
        modified_args.append("quiet=True")
    if args.debug:
        modified_args.append("debug=True")
        config["log_files"]["debug_mode"] = True
    else:
        config["log_files"]["debug_mode"] = False
    if args.max_files:
        modified_args.append(f"max_files={args.max_files}")
    if args.alglist:
        modified_args.append(f"alglist={args.alglist}")
    if args.conf:
        modified_args.append(f"conf={args.conf}")
    if args.logstring:
        modified_args.append(f"logstring={args.logstring}")
    if args.multiprocessing:
        config["chain"]["use_multi_processing"] = True
        modified_args.append("use_multi_processing=True")
    if args.sequentialprocessing:
        config["chain"]["use_multi_processing"] = False
        modified_args.append("use_multi_processing=False")
    if args.sharedmem:
        if config["chain"]["use_multi_processing"]:
            config["chain"]["use_shared_memory"] = True
            modified_args.append("use_shared_memory=True")
        else:
            sys.exit(
                "ERROR: --sharedmem option must be used  with multi-processing enabled"
                "\nEither through the --multiprocessing command line option, or"
                "\nthrough the chain:use_multi_processing setting in the main config file"
            )
    if args.nprocs:
        config["chain"]["max_processes_for_multiprocessing"] = args.nprocs
        modified_args.append(f"max_processes_for_multiprocessing={args.nprocs}")

    config["chain"]["chain_name"] = args.name

    if args.stop_on_error:
        config["chain"]["stop_on_error"] = True
        modified_args.append("stop_on_error=True")
    if args.continue_on_error:
        config["chain"]["stop_on_error"] = False
        modified_args.append("stop_on_error=False")

    # Process command line arg 'conf_opts' to modify config dict
    # these a comma separated with : to separate levels
    #
    if args.conf_opts:
        keyvals = args.conf_opts.split(",")
        for keyval in keyvals:
            if ":" not in keyval:  # single level, without value == True
                config[keyval] = True
            else:
                mkeyvals = keyval.split(":")
                val = mkeyvals[-1]
                if val == "false":
                    val = False
                elif val == "true":
                    val = True
                elif val.isdigit():
                    val = int(val)
                else:
                    try:
                        val = float(val)
                    except ValueError:
                        pass

                set_nested_key(config, mkeyvals[:-1], val)

    # -------------------------------------------------------------------------
    # Check we have enough input command line args
    # -------------------------------------------------------------------------

    if args.start_date and not args.end_date:
        sys.exit(
            "Command line error: Missing --end_date. Must include --end_date "
            "when using  --start_date"
        )

    if args.start_date and args.year:
        sys.exit("Command line error: Do not include both --year and --start/end_date")

    if (  # pylint: disable=too-many-boolean-expressions
        not args.list
        and not args.file
        and not args.dir
        and not args.year
        and not args.month
        and not (args.start_date and args.end_date)
    ):
        sys.exit(
            f"usage error: No L1b inputs specified for the {args.name} chain. Must have either "
            "\n--file (-f) <single L1b file as input>,"
            "\n--dir (-d) <input all L1b files in this directory>, or "
            "\n--year (-y) <YYYY> and --month (-m) <M> : search for files for "
            "specified year and month."
            "\n--start_date (-s)  and --end_date (-e) : search for files for "
            "specified start and end date or time."
        )

    # -------------------------------------------------------------------------
    # Setup logging
    #   - default log level is INFO unless --debug command line argument is set
    #   - default log files paths for error, info, and debug are defined in the
    #     main config file
    # -------------------------------------------------------------------------

    # Test that all log file settings are in the config dict

    if "log_files" not in config:
        sys.exit(f"log_files section missing from chain configuration file {chain_config_file}")
    if "default_dir" not in config["log_files"]:
        sys.exit(
            f"log_files:default_dir section missing from chain config file {chain_config_file}"
        )
    if "error_name" not in config["log_files"]:
        sys.exit(f"log_files:error_name section missing from chain config file {chain_config_file}")
    if "info_name" not in config["log_files"]:
        sys.exit(f"log_files:info_name section missing from chain config file {chain_config_file}")
    if "debug_name" not in config["log_files"]:
        sys.exit(f"log_files:debug_name section missing from chain config file {chain_config_file}")
    if "logname_str" not in config["log_files"]:
        sys.exit(
            f"log_files:logname_str section missing from chain config file {chain_config_file}"
        )
    if "append_date_selection" not in config["log_files"]:
        sys.exit(
            "log_files:append_date_selection section missing from chain config file "
            f"{chain_config_file}"
        )

    if not os.path.isdir(config["log_files"]["default_dir"]):
        sys.exit(
            f'The selected log file directory {config["log_files"]["default_dir"]} '
            "does not exist. Please create it first."
        )

    # Form log file names
    log_file_error_name = os.path.join(
        f'{config["log_files"]["default_dir"]}', f'{config["log_files"]["error_name"]}.log'
    )
    log_file_info_name = os.path.join(
        f'{config["log_files"]["default_dir"]}', f'{config["log_files"]["info_name"]}.log'
    )
    log_file_debug_name = os.path.join(
        f'{config["log_files"]["default_dir"]}', f'{config["log_files"]["debug_name"]}.log'
    )

    # Add a string before .log if args.logstring is set
    log_string = config["log_files"]["logname_str"]
    if args.logstring:
        log_string = args.logstring
    if log_string.strip():  # check it has non-space chars, and remove space chars
        log_file_error_name = log_file_error_name.replace(".log", f"_{log_string}.log")
        log_file_info_name = log_file_info_name.replace(".log", f"_{log_string}.log")
        log_file_debug_name = log_file_debug_name.replace(".log", f"_{log_string}.log")

    # Add _YYYY or _MMYYYY before .log if config["log_files"]["append_year_month_to_logname"]
    # is set
    if config["log_files"]["append_date_selection"]:
        if args.year and not args.month:
            year_str = "_".join(str(num) for num in args.year)
            log_file_error_name = log_file_error_name.replace(".log", f"_{year_str}.log")
            log_file_info_name = log_file_info_name.replace(".log", f"_{year_str}.log")
            log_file_debug_name = log_file_debug_name.replace(".log", f"_{year_str}.log")
        if args.year and args.month:
            year_str = "_".join(str(num) for num in args.year)
            month_str = "_".join(f"{num:02d}" for num in args.month)
            log_file_error_name = log_file_error_name.replace(".log", f"_{month_str}{year_str}.log")
            log_file_info_name = log_file_info_name.replace(".log", f"_{month_str}{year_str}.log")
            log_file_debug_name = log_file_debug_name.replace(".log", f"_{month_str}{year_str}.log")

    if config["log_files"]["append_process_id"]:
        log_file_error_name = log_file_error_name.replace(".log", f"_{os.getpid()}.log")
        log_file_info_name = log_file_info_name.replace(".log", f"_{os.getpid()}.log")
        log_file_debug_name = log_file_debug_name.replace(".log", f"_{os.getpid()}.log")

    # Format the timestamp for the filename (e.g., 20230325_153045)
    formatted_time = datetime.now().strftime("%Y%m%dT%H%M%S")
    if config["log_files"]["append_start_time"]:
        log_file_error_name = log_file_error_name.replace(".log", f"_{formatted_time}.log")
        log_file_info_name = log_file_info_name.replace(".log", f"_{formatted_time}.log")
        log_file_debug_name = log_file_debug_name.replace(".log", f"_{formatted_time}.log")

    config["log_files"]["error_name"] = log_file_error_name
    config["log_files"]["info_name"] = log_file_info_name
    config["log_files"]["debug_name"] = log_file_debug_name

    log = get_logger(
        default_log_level=logging.DEBUG if args.debug else logging.INFO,
        log_file_error=log_file_error_name,
        log_file_info=log_file_info_name,
        log_file_debug=log_file_debug_name,
        log_name="run_chain",
        silent=args.quiet,
        color_errors_red=config["chain"]["error_logs_red"],
    )
    if args.quiet:
        print(f"info log: {log_file_info_name}")
        print(f"error log: {log_file_error_name}")
        if args.debug:
            print(f"debug log: {log_file_debug_name}")

    log.info("error log: %s", log_file_error_name)
    log.info("info log: %s", log_file_info_name)
    if args.debug:
        log.info("debug log: %s", log_file_debug_name)

    log.info("Chain name: %s ", args.name)

    # -------------------------------------------------------------------------------------------
    # Read the list of algorithms to use for this chain
    # -------------------------------------------------------------------------------------------

    log.info("chain config used: %s", chain_config_file)

    try:
        (
            algorithm_list,
            finder_list,
            alg_list_file,
            breakpoint_alg_name,
        ) = load_algorithm_list(
            args.name,
            alg_list_file=args.alglist,
            log=log,
        )
    except (KeyError, OSError, ValueError):
        log.debug("Loading algorithm list file failed : %s", traceback.format_exc())
        log.error("Loading algorithm list file (xml or yml) failed. Use --debug for more info")
        sys.exit(1)

    log.debug("Number of finder algorithms found %d", len(finder_list))
    if len(finder_list) > 0:
        for finder in finder_list:
            log.debug("Finder algorithm %s", finder)

    if args.list:
        blue = "\033[94m"
        endc = "\033[0m"  # Reset to default color
        print("-" * len(f"Algorithms for chain: {args.name}"))
        print(f"{blue}Algorithms for chain: {args.name}{endc}")
        print("-" * len(f"Algorithms for chain: {args.name}"))
        for algnum, alg in enumerate(algorithm_list):
            print(f"{algnum+1}: {alg}")
        sys.exit(1)

    if args.breakpoint_after:
        breakpoint_alg_name = args.breakpoint_after

    if breakpoint_alg_name:
        log.info("breakpoint set after algorithm %s", breakpoint_alg_name)

    if config["chain"]["use_multi_processing"]:
        # change the default method of multi-processing for Linux from
        # fork to spawn
        mp.set_start_method("spawn")

    # -------------------------------------------------------------------------------------------
    #  Select input L1b files
    #   - single file : args.file
    #   - multiple files : args.dir
    #   - multiple files with recursive search : args.recursive and args.dir
    #   - year and/or month selection
    #   - start and end date selection
    # -------------------------------------------------------------------------------------------

    if args.file:
        l1b_file_list = [args.file]
    elif args.start_date and args.end_date:
        if args.dir:
            if "l1b_file_finder" not in config:
                config["l1b_file_finder"] = {}
            config["l1b_file_finder"]["base_dir"] = args.dir
        l1b_file_list = []
        l1b_file_selector_modules = finder_list

        if len(l1b_file_selector_modules) > 0:
            for file_selector_module in l1b_file_selector_modules:
                # Import module
                try:
                    module = importlib.import_module(
                        f"clev2er.algorithms.{config['chain']['chain_name']}.{file_selector_module}"
                    )
                except ImportError as exc:
                    log.error("Could not import module %s, %s", file_selector_module, exc)
                    sys.exit(1)

                try:
                    finder = module.FileFinder(log=log, config=config)
                    finder.start_date = args.start_date
                    finder.end_date = args.end_date
                    files = finder.find_files(flat_search=not bool(args.recursive))
                    if len(files) > 0:
                        l1b_file_list.extend(files)
                except (KeyError, ValueError, FileNotFoundError) as exc:
                    log.error("file finder error: %s", exc)
                    sys.exit(1)
    elif args.year:
        if args.dir:
            if "l1b_file_finder" not in config:
                config["l1b_file_finder"] = {}
            config["l1b_file_finder"]["base_dir"] = args.dir

        # Extract the optional file choosers
        l1b_file_list = []

        l1b_file_selector_modules = finder_list

        if len(l1b_file_selector_modules) > 0:
            for file_selector_module in l1b_file_selector_modules:
                # Import module
                try:
                    module = importlib.import_module(
                        f"clev2er.algorithms.{config['chain']['chain_name']}.{file_selector_module}"
                    )
                except ImportError as exc:
                    log.error("Could not import module %s, %s", file_selector_module, exc)
                    sys.exit(1)

                try:
                    finder = module.FileFinder(log=log, config=config)
                    if args.month and args.year:
                        finder.add_month(args.month)
                        finder.add_year(args.year)
                    if args.year and args.month is None:
                        finder.add_year(args.year)
                        for month in range(1, 13):
                            finder.add_month([month])

                    files = finder.find_files(flat_search=not bool(args.recursive))
                    if len(files) > 0:
                        l1b_file_list.extend(files)
                except (KeyError, ValueError, FileNotFoundError) as exc:
                    log.error("file finder error: %s", exc)
                    sys.exit(1)
    elif args.dir and not args.recursive:  # flat directory search
        l1b_file_list = glob.glob(os.path.join(args.dir, "CR?_IR_1B*.[nN][cC]"))
    elif args.dir and args.recursive:  # recursive search
        l1b_file_list = glob.glob(
            os.path.join(args.dir, "**", "CR?_IR_1B*.[nN][cC]"), recursive=True
        )

    if args.imode:
        l1b_file_list = filter_files_by_instrument_mode_substrings(l1b_file_list, args.imode)

    if args.pmode:
        l1b_file_list = filter_files_by_processing_mode_substrings(l1b_file_list, args.pmode)

    if args.max_files:
        if len(l1b_file_list) > args.max_files:
            l1b_file_list = l1b_file_list[: args.max_files]

    n_l1b_files = len(l1b_file_list)

    # ----------------------------------------------------------------------------------------
    # Check that the L1b file list contains at least 1 readable file
    # ----------------------------------------------------------------------------------------

    num_files_readable = 0
    for l1b_file in l1b_file_list:
        if os.path.isfile(l1b_file):
            num_files_readable += 1
            break
    if num_files_readable == 0:
        log.error("No input files in list exist, please check L1b input directories and files")
        sys.exit(1)

    log.info("Total number of L1b file found:  %d", n_l1b_files)
    if args.conf_opts:
        log.info("additional config options from command line are: %s", args.conf_opts)

    # Check if we have any L1b files to process
    if n_l1b_files < 1:
        log.error("No L1b files selected..")
        sys.exit(1)

    # --------------------------------------------------------------------
    # Run the chain on the file list
    # --------------------------------------------------------------------

    if config["chain"]["stop_on_error"]:
        log.warning("**Chain configured to stop on first error**")

    start_time = time.time()

    if args.breakpoint_after:
        if args.breakpoint_after not in algorithm_list:
            log.error(
                "breakpoint algorithm %s not in algorithm list (check the name is correct)",
                args.breakpoint_after,
            )
            sys.exit(1)

    _, number_errors, num_files_processed, num_skipped, breakpoint_filename, _ = run_chain(
        l1b_file_list, config, algorithm_list, log, args.breakpoint_after
    )

    elapsed_time = time.time() - start_time

    # --------------------------------------------------------------------
    # Log chain summary stats
    # --------------------------------------------------------------------

    log.info("\n%sChain Run Summary          %s", "-" * 20, "-" * 20)

    log.info(
        "%s chain completed in %.2f seconds := (%.2f mins := %.2f hours)",
        args.name,
        elapsed_time,
        elapsed_time / 60.0,
        (elapsed_time / 60.0) / 60.0,
    )
    log.info(
        "%s chain processed %d L1b files of %d. %d files skipped, %d errors",
        args.name,
        num_files_processed,
        len(l1b_file_list),
        num_skipped,
        number_errors,
    )

    log.info("\n%sLog Files          %s", "-" * 20, "-" * 20)

    log.info("log file (INFO): %s", config["log_files"]["info_name"])
    log.info("log file (ERRORS): %s", config["log_files"]["error_name"])
    if args.debug:
        log.info("log file (DEBUG): %s", config["log_files"]["debug_name"])

    if config["chain"]["use_multi_processing"]:
        # sort .mp log files by filenum processed (as they will be jumbled)
        log.info("Sorting multi-processing error log file...")
        sort_file_by_number(config["log_files"]["error_name"] + ".mp")
        log.info("Sorting multi-processing info log file...")
        sort_file_by_number(config["log_files"]["info_name"] + ".mp")
        if args.debug:
            log.info("Sorting multi-processing debug log file...")
            sort_file_by_number(config["log_files"]["debug_name"] + ".mp")

        log.info("merging log files...")
        # put .mp log contents into main log file
        insert_txtfile1_in_txtfile2_after_line_containing_string(
            config["log_files"]["info_name"] + ".mp",
            config["log_files"]["info_name"],
            "MP processing completed with outputs logged:",
        )
        if args.debug:
            insert_txtfile1_in_txtfile2_after_line_containing_string(
                config["log_files"]["debug_name"] + ".mp",
                config["log_files"]["debug_name"],
                "MP processing completed with outputs logged:",
            )
        append_file(config["log_files"]["error_name"] + ".mp", config["log_files"]["error_name"])

        # remove all the .mp temporary log files
        for file_path in [
            config["log_files"]["error_name"] + ".mp",
            config["log_files"]["info_name"] + ".mp",
            config["log_files"]["debug_name"] + ".mp",
        ]:
            try:
                # Check if the file exists
                if os.path.exists(file_path):
                    # Delete the file
                    os.remove(file_path)
            except OSError as exc:
                log.error("Error occurred while deleting the file %s : %s", file_path, exc)
    else:
        # remove the multi-processing marker string '[fN]' from log files
        remove_strings_from_file(config["log_files"]["info_name"])
        remove_strings_from_file(config["log_files"]["error_name"])
        if args.debug:
            remove_strings_from_file(config["log_files"]["debug_name"])

    log.info("\n%sConfig Files          %s", "-" * 20, "-" * 20)
    log.info("Run config: %s", config_file)
    log.info("Chain config: %s", chain_config_file)
    log.info("Algorithm list file: %s", alg_list_file)

    if len(modified_args) > 0:
        for mod_args in modified_args:
            log.info("cmdline overide: %s", mod_args)
    if len(breakpoint_filename) > 0:
        log.info("breakpoint file name: %s", breakpoint_filename)

    # print a final message to terminal to indicate red (errors) or green(no errors)
    red = "\033[91m"
    green = "\033[92m"
    endc = "\033[0m"  # Reset to default color
    if number_errors == 0:
        print(f'{green}{config["chain"]["chain_name"]} chain ran with no errors :){endc}')
        sys.exit(0)
    else:
        print(
            f"{red}{number_errors} error(s) encountered running "
            f'{config["chain"]["chain_name"]} chain. '
            f'See error log {config["log_files"]["error_name"]}{endc}'
        )
        sys.exit(1)


if __name__ == "__main__":
    main()
