# Standard library imports
import atexit
import base64
import configparser
import getpass
import hashlib
import json
import logging
import logging.handlers as handlers
import os
import sqlite3
import sys
import threading
import time
import uuid
from signal import SIGTERM
from string import Template
import argparse
import csv

# Third-party imports
import dateutil.parser
import requests

EXPORTER_VERSION = "1.0.0"

##########################
# User editable parameters
##########################

# Save API key if True, else, API key will need
# to be supplied on each invocation of the exporter
SAVE_API_KEY = True

# Log export API intervals/timeouts in seconds
LOG_POLLING_INTERVAL = 300 # 5 minutes
EXPORT_API_TIMEOUT = 300 # 5 minutes 
EXPORT_API_LIMIT = 2000

# Token related timeouts/intervals in seconds
TOKEN_REFRESH_INTERVAL = 3000 # 50 minutes
RETRY_MIN_SLEEP_SECONDS = 5
RETRY_MAX_SLEEP_SECONDS = 60
TOKEN_TIMEOUT = 60

#################################
# End of user editable parameters
#################################

#######################
# Local Storage Folders
#######################

# Folder containing this file
SRC_FOLDER = os.path.abspath(os.path.dirname(__file__))  # do not edit

# Folder to save collected log files
OUTPUT_FOLDER = os.path.join(SRC_FOLDER, "logs")  # editable

# Folder where the exporter keeps working data
WORK_FOLDER = os.path.join(SRC_FOLDER, "work")  # editable

# Folder to store state.db
DB_PATH = os.path.join(WORK_FOLDER, "state.db")  # do not edit

# File path schema for collected log storage (rendered using strftime)
STORAGE_SCHEMA = os.path.join(OUTPUT_FOLDER, "%Y", "%m", "%Y-%m-%d", "${ppcode}_${collection}_%Y-%m-%d-%H.${data_format}")  # user editable

#######################
# Default Fields and Filters 
# (applied if no config file is provided)
#######################

DEFAULT_FIELDS = {
    "SSE_Admin": ["eventTime","udn","upn","dos","apn","dip","tgs","atgs","uag","apd","fnm","uri","msg","cty","reg","rgc","crc"],
    "SSE_CASBAPI": ["fnm","eventTime","fsz","own","apn","stt","act","fld","fid","dptn","link","shd","ofStatus","ofShared","afName","afid","nfFileName","nfid","acd","dpi","tmn","crt","mod","aip","atr","afOwner","afStatus","afLink","afPath"],
    "SSE_CASBInline": ["eventTime","upn","dos","atgs","msg","fnm","apn","dip","cty","reg","crc","apd","tgs","udn","did","ext","fre","toe","esj","bce","cce","ptl","uri","uag","rgc","org","inm","aid","gdn","dpi"],
    "SSE_DLP": ["eventTime","apd","nxd","stp","apn","dlpact","atgs","fnm","upn","udn","csl","kwd","tgs","trt","uri","ftp","ext","sh256","sh1","md5","gdn","did","dos","uag","dip","cty","crc","reg","rgc","sorg","dpi"],
    "SSE_Health": ["eventTime","udn","upn","gdn","dos","apn","tgs","uri","rsc","apd","did","dip","rmd","uag","fnm","fid","stt","cty","reg","rgc","crc","lgt"],
    "SSE_SWG": ["eventTime","did","dvi","dsi","uri","dom","arg","rmd","ptl","ufn","uln","upn","gid","act","dhn","nxd","rbt","sbt","uag","lat","lon","cty","cry","crc","reg","rgc","csl","cct","ref","apn","prt","url","dpi","tsc","tss","trsk"],
    "SSE_ZTNA": ["eventTime","dpi","upn","act","nxd","ptl","ptc","ztyp","apn","gwi","cty","reg","cry","crc","rgc","dos","dhn","mus","did","dom","uri","url","arg","svh","dsi","prt","err","dtc","cnn","zaz","lui","gid","dvi","pvi","lat","lon","ufn","uln","pxy","ttp","snm","csl","con","sbt","rbt"]
}

DEFAULT_FILTERS = {
    "SSE_SWG": {
        "act": {"ne": ["Allowed"]}
    }
}


class StateDB:
    def __init__(self, db_path=DB_PATH, table="data", allowed_keys=set()):
        self.db_path = db_path
        self.table = table
        self.allowed_keys = allowed_keys

    def create_table(self):
        with sqlite3.connect(self.db_path) as conn:
            conn.execute(f"CREATE TABLE IF NOT EXISTS {self.table} (key text primary key, value text)")

    def get_key(self, key):
        with sqlite3.connect(self.db_path) as conn:
            try:
                for row in conn.execute(f"SELECT value FROM {self.table} WHERE key=?", (key,)):
                    return row[0]
            except Exception:
                return None

    def delete_key(self, key):
        with sqlite3.connect(self.db_path) as conn:
            try:
                conn.execute(f"DELETE FROM {self.table} WHERE key=?", (key,))
            except Exception:
                logger.debug("key not found in db: %s", key)
            conn.commit()

    def set_key(self, key, value):
        if key not in self.allowed_keys:
            raise RuntimeError("Bad key: %s", key)
        with sqlite3.connect(self.db_path) as conn:
            conn.execute(f"INSERT OR REPLACE INTO {self.table} (key, value) values (?, ?)", (key, value))


class TokenManager:
    def __init__(self, portal_host, api_key, retry_min_sleep_seconds=RETRY_MIN_SLEEP_SECONDS, retry_max_sleep_seconds=RETRY_MAX_SLEEP_SECONDS, refresh_interval=TOKEN_REFRESH_INTERVAL):
        self.api_key = api_key
        self.portal_host = portal_host
        self.session = requests.Session()
        self.url = f"https://{self.portal_host}/api/apikeys/token"

        self.retry_min_sleep_seconds = retry_min_sleep_seconds
        self.retry_max_sleep_seconds = retry_max_sleep_seconds
        self.refresh_interval = refresh_interval

        # Create a lock for thread-safe access to the token
        self.token = None
        self._thread = None
        self.token_lock = threading.Lock()
        self._stop_event = threading.Event()

    def get_token(self):
        with self.token_lock:  # Acquire the lock before accessing the token
            return self.token

    def start_token_generation(self):
        # First, try to generate the token synchronously
        self.initial_token_generation()
        if self.token is None:
            return
        # If successful, start the background thread for periodic token regeneration
        self._thread = threading.Thread(target=self.periodic_token_regeneration, daemon=True)
        self._thread.start()

    def stop_token_generation(self):
        if self._stop_event:
            self._stop_event.set()
        if self._thread:
            self._thread.join(timeout=5)

    def create_token(self, timeout=TOKEN_TIMEOUT):
        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "Accept": "application/json",
            "X-API-KEY": self.api_key
        }
        try:
            response = self.session.post(self.url, headers=headers, timeout=timeout)
            new_token = response.json()["token"]  # Get the new token
            with self.token_lock:  # Acquire the lock before updating the token
                self.token = new_token
            logger.info("Token created successfully.")
            return self.token
        except KeyError:
            logger.error("Invalid token response.")
            raise
        except Exception:
            logger.error("Failed to create token")
            raise

    def initial_token_generation(self):
        try:
            # Attempt to create a token with a total timeout of 60 seconds
            self.create_token()
        except requests.exceptions.Timeout:
            logger.exception("Token creation request timed out after 60 seconds")
            raise  # Re-raise the exception if the token cannot be generated within 60 seconds
        except Exception:
            logger.exception("Failed to create token within 60 seconds")
            raise  # Re-raise the exception to handle it outside this method

    def periodic_token_regeneration(self):
        while not self._stop_event.is_set():
            time.sleep(self.refresh_interval)
            self.create_token_with_retries()

    def create_token_with_retries(self):
        interval_in_seconds = self.retry_min_sleep_seconds
        while True:
            try:
                self.create_token()
                return
            except Exception:
                logger.exception("Retrying token creation after error")
                time.sleep(interval_in_seconds)
                interval_in_seconds = min(interval_in_seconds * 2, self.retry_max_sleep_seconds)


class Daemon:
    def __init__(self, 
                 pid_file,  # nosec
                 force_date=None,
                 portal_host=None,
                 insights_host=None,
                 storage_schema=None,
                 api_key=None,
                 ppcode=None,
                 collection=None,
                 config_file=None,
                 data_format=None,
                 output_folder=OUTPUT_FOLDER,
                 work_folder=WORK_FOLDER,
                 db_path=DB_PATH,
                 table=None,
                 stdin="/dev/null", 
                 stdout="/dev/null", 
                 stderr="/dev/null"):
        self.pid_file = pid_file
        self.force_date = force_date
        self.portal_host = portal_host
        self.insights_host = insights_host
        self.storage_schema = storage_schema
        self.api_key = api_key
        self.ppcode = ppcode
        self.collection = collection
        self.config_file = config_file
        self.data_format = data_format
        self.output_folder = output_folder
        self.work_folder = work_folder
        self.db_path = db_path
        self.table = table
        self.stdin = stdin
        self.stdout = stdout
        self.stderr = stderr
        allowed_keys = set(["stop_daemon", "format", "insights_host", 
                            "portal_host", "ppcode", "collection",
                            "offset", "last_inserted_time", "api_key"])
        self.state_db = StateDB(db_path=self.db_path, table=self.table, allowed_keys=allowed_keys)
        self.global_state_db = StateDB(db_path=self.db_path, table="global_data", allowed_keys={"instance_id"})

    def daemonize(self):
        try:
            pid = os.fork()
            if pid > 0:
                sys.exit(0)
        except OSError as e:
            sys.stderr.write(f"fork #1 failed: {e.errno} ({e.strerror})\n")
            sys.exit(1)
        os.chdir("/")
        os.setsid()
        os.umask(0)
        try:
            pid = os.fork()
            if pid > 0:
                sys.exit(0)
        except OSError as e:
            sys.stderr.write(f"fork #2 failed: {e.errno} ({e.strerror})\n")
            sys.exit(1)
        sys.stdout.flush()
        sys.stderr.flush()
        si = open(self.stdin, "r")
        so = open(self.stdout, "a+")
        se = open(self.stderr, "a+", buffering=1)
        os.dup2(si.fileno(), sys.stdin.fileno())
        os.dup2(so.fileno(), sys.stdout.fileno())
        os.dup2(se.fileno(), sys.stderr.fileno())
        atexit.register(self.delpid)
        pid = str(os.getpid())
        open(self.pid_file, "w+").write(f"{pid}\n")

    def delpid(self):
        if os.path.isfile(self.pid_file):
            os.remove(self.pid_file)

    def start(self):
        try:
            pf = open(self.pid_file, "r")
            pid = int(pf.read().strip())
            pf.close()
        except IOError:
            pid = None
        if pid:
            sys.stderr.write(f"PID File ({self.pid_file}) already exists. Daemon already running?\n")
            sys.exit(1)
        self.daemonize()
        self.run()

    def stop(self, verbose=True):
        try:
            pf = open(self.pid_file, "r")
            pid = int(pf.read().strip())
            pf.close()
        except IOError:
            pid = None
        if not pid:
            if verbose:
                sys.stderr.write(f"\nPID file ({self.pid_file}) does not exist.\nDaemon not running?\n\n")
            return
        try:
            while 1:
                os.kill(pid, SIGTERM)
                time.sleep(0.1)
        except OSError as err:
            err = str(err)
            if err.find("No such process") > 0:
                if os.path.exists(self.pid_file):
                    os.remove(self.pid_file)
            else:
                logger.error(str(err))
                sys.exit(1)
        logger.info("Daemon stopped.")

    def restart(self):
        self.stop(verbose=False)
        self.start()

    def removePIDFile(self):
        if os.path.isfile(self.pid_file):
            os.remove(self.pid_file)

    def run(self):
        try:
            collect_logs(
                force_date=self.force_date,
                portal_host=self.portal_host,
                insights_host=self.insights_host,
                storage_schema=self.storage_schema,
                state_db=self.state_db,
                global_state_db=self.global_state_db,
                api_key=self.api_key,
                ppcode=self.ppcode,
                collection=self.collection,
                output_folder=self.output_folder,
                config_file=self.config_file,
                data_format=self.data_format,
            )
        except Exception:
            logger.exception("Daemon crashed")
            raise
        finally:
            self.removePIDFile()
            error_message = self.state_db.get_key("stop_daemon")
            if error_message:
                logger.error(error_message)
            logger.error("Daemon stopped.")
            raise


def get_logger(
        log_file, 
        logger_name, 
        log_level=logging.DEBUG, 
        backup_count=3,
        utc=True, 
        interval_minutes=24*60
    ):
    logger = logging.getLogger(logger_name)
    logger.setLevel(log_level)
    handler = handlers.TimedRotatingFileHandler(
        log_file, 
        when="M",
        interval=interval_minutes,
        backupCount=backup_count,
        encoding=None,
        utc=utc
    )
    formatter = logging.Formatter(
        "%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s",
        datefmt="%Y%m%d%H%M%S"
    )
    formatter.converter = time.gmtime
    handler.setFormatter(formatter)
    if not logger.handlers:
        logger.addHandler(handler)
    return logger


def _ch_gen(start):
    idx = 0
    z = 0
    while True:
        h = hashlib.sha256(start).digest()
        ch = (yield h[idx % len(h)] ^ z)
        z = ch
        start += bytes([ch])
        idx += 1


def obfuscate(text, encode, start=b"simple"):
    result = []
    c = _ch_gen(start)
    e = next(c)
    for t in text:
        v = t ^ e
        result.append(v)
        e = c.send(t if encode else v)
    return bytes(result)


def force_new_date(state_db, start_date):
    try:
        date_string = dateutil.parser.parse(start_date).strftime("%Y-%m-%d %H:%M:%S")
        logger.debug(f"Forcing new start date: {start_date}")
    except Exception:
        return False
    state_db.set_key("last_inserted_time", date_string)
    state_db.set_key("offset", 0)
    return True


def fetch_api_logs(insights_host, ppcode, collection, fields, filters, start_time, offset, token, data_format, instance_id):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {token}",
        "Accept": "application/json" if data_format == "json" else "application/csv"
    }
    data = {
        "starttime": start_time,
        "offset": offset,
        "limit": EXPORT_API_LIMIT,
        "instanceId": instance_id
    }
    data["fields"] = [{"name": f} for f in fields]
    data["filters"] = filters
    export_url = f"https://{insights_host}/api/export/{ppcode}/{collection}"
    logger.debug("Fetching logs from API: %s with config: %s", export_url, data)
    response = requests.post(export_url, headers=headers, json=data, timeout=EXPORT_API_TIMEOUT)
    if response.status_code != 200:
        logger.error("unable to connect. url: %s, response: %s", export_url, response.text)
        raise RuntimeError(f"Log API request failed with status {response.status_code}: {response.text}")
    if data_format == "json":
        resp_json = response.json()
        results = resp_json.get("results", [])
    elif data_format == "csv":
        results = response.text.strip().splitlines()
    return results


def get_default_config(ppcode, collection):
    config_key = f"{ppcode}_{collection}"
    fields = DEFAULT_FIELDS.get(config_key, [])
    filters = DEFAULT_FILTERS.get(config_key, {}).copy()
    return fields, filters


def load_ini_config(config_path, ppcode, collection):
    config = configparser.ConfigParser()
    config.optionxform = str  # preserve case
    config.read(config_path, encoding="utf-8")

    fields = []
    fields_section = f"{ppcode}_{collection}_fields"
    if fields_section in config:
        fields_str = config[fields_section].get("fields", "")
        fields = [f.strip() for f in fields_str.split(",") if f.strip()]

    filters = {}
    filter_prefix = f"{ppcode}_{collection}_filter_"
    for section in config.sections():
        if section.startswith(filter_prefix):
            filter_key = section[len(filter_prefix):]
            filters[filter_key] = {}
            for k, v in config[section].items():
                values = [item.strip() for item in v.split(",") if item.strip()]
                filters[filter_key][k] = values

    return fields, filters


def generate_logs(token_manager: TokenManager, insights_host, state_db: StateDB, ppcode, collection, fields, filters, data_format, instance_id):
    last_inserted_time = offset = None
    offset = state_db.get_key("offset")
    if not offset:
        offset = 0
    else:
        offset = int(offset)
    last_inserted_time = state_db.get_key("last_inserted_time")
    if not last_inserted_time:
        state_db.set_key("stop_daemon", "Please specify start date using the -d option.")
        raise RuntimeError("No start date specified or found in DB")
    while True:
        # Fetch logs from API
        token = token_manager.get_token()
        results = fetch_api_logs(insights_host, ppcode, collection, fields, filters, last_inserted_time, offset, token, data_format, instance_id)
        if not results:
            logger.info("No new logs received from API")
            time.sleep(LOG_POLLING_INTERVAL)
            continue
        
        # Find maximum eventinserted time
        headers = None
        event_times = []
        parsed_rows = []
        if data_format == "csv":
            headers = results[0]
            results = results[1:]
            dict_reader = csv.DictReader(results, fieldnames=next(csv.reader([headers])))
            for row_dict, raw_line in zip(dict_reader, results):
                event_time = row_dict.get("eventinsertedtime")
                if not event_time:
                    raise RuntimeError(f"eventinsertedtime not found in CSV row: {row_dict}")
                try:
                    event_time_dt = dateutil.parser.parse(event_time)
                except Exception:
                    raise RuntimeError(f"Failed to parse eventinsertedtime '{event_time}' in CSV row: {row_dict}")
                parsed_rows.append((raw_line, event_time_dt))
                event_times.append(event_time_dt)
        else:
            for item in results:
                event_time = item.get("eventinsertedtime")
                if not event_time:
                    raise RuntimeError(f"eventinsertedtime not found in JSON item: {item}")
                try:
                    event_time_dt = dateutil.parser.parse(event_time)
                except Exception:
                    raise RuntimeError(f"Failed to parse eventinsertedtime '{event_time}' in JSON item: {item}")
                parsed_rows.append((item, event_time_dt))
                event_times.append(event_time_dt)
        max_inserted_time = max(event_times)

        # Write data
        max_inserted_time_count = 0
        for row, eventinsertedtime in parsed_rows:
            """
            Skip items with eventinsertedtime equal to max_eventinsertedtime.
            This prevents duplicate processing, since max_eventinsertedtime will be used as starttime for the next batch.
            Only records with eventinsertedtime strictly less than max_inserted_time are processed.
            """ 
            if eventinsertedtime == max_inserted_time:
                max_inserted_time_count += 1
            if data_format == "csv":
                yield headers, row, eventinsertedtime
            elif data_format == "json":
                yield None, json.dumps(row), eventinsertedtime

        if max_inserted_time.strftime("%Y-%m-%d %H:%M:%S") == last_inserted_time:
            offset += max_inserted_time_count
        else:
            offset = max_inserted_time_count
        last_inserted_time = max_inserted_time.strftime("%Y-%m-%d %H:%M:%S")
        logger.debug("Fetched %s items, max eventinsertedtime %s, next offset %s", len(results), max_inserted_time, offset)
        state_db.set_key("last_inserted_time", last_inserted_time)
        state_db.set_key("offset", offset)
        time.sleep(LOG_POLLING_INTERVAL)


def get_or_create_instance_id(global_state_db: StateDB):
    instance_id = global_state_db.get_key("instance_id")
    if not instance_id:
        new_uuid = str(uuid.uuid4())
        instance_id = f"script-{new_uuid}"
        global_state_db.set_key("instance_id", instance_id)
        logger.info(f"Generated new instance ID: {instance_id}")
    else:
        logger.debug(f"Using existing instance ID: {instance_id}")
    return instance_id


def collect_logs(force_date, portal_host, insights_host, storage_schema, state_db: StateDB, global_state_db: StateDB, api_key, ppcode, collection, output_folder, config_file, data_format):
    try:
        logger.info("Daemon started.")

        token_manager = None
        state_db.create_table()
        state_db.delete_key("stop_daemon")
        global_state_db.create_table()
        
        instance_id = get_or_create_instance_id(global_state_db)

        if SAVE_API_KEY:
            if api_key:
                state_db.set_key("api_key", base64.b64encode(obfuscate(api_key.encode(), 1)).decode())
            try:
                api_key = obfuscate(base64.b64decode(state_db.get_key("api_key")), 0)
            except Exception:
                state_db.set_key("stop_daemon", "Failed to obfuscate API key")
                raise RuntimeError("Failed to obfuscate API key")
        else:
            # ensure no saved api key
            state_db.delete_key("api_key")
        if not api_key:
            state_db.set_key("stop_daemon", "Please enter api key using the -k or --api-key option.")
            raise RuntimeError("No API Key")
        
        if data_format:
            state_db.set_key("format", data_format)
        data_format = state_db.get_key("format")
        if not data_format:
            state_db.set_key("stop_daemon", "Please specify the log data format using the -df option.")
            raise RuntimeError("No log data format specified or found in DB")

        s = Template(storage_schema)
        storage_schema = s.substitute(ppcode=ppcode, collection=collection, data_format=data_format)

        if insights_host:
            state_db.set_key("insights_host", insights_host)
        insights_host = state_db.get_key("insights_host")
        if not insights_host:
            state_db.set_key("stop_daemon", "Please specify the Insights host using the -i option.")
            raise RuntimeError("No Insights host specified or found in DB")

        if portal_host:
            state_db.set_key("portal_host", portal_host)
        portal_host = state_db.get_key("portal_host")
        if not portal_host:
            state_db.set_key("stop_daemon", "Please specify the portal host using the -p option.")
            raise RuntimeError("No Portal host specified or found in DB")

        fields = []
        filters = {}
        if config_file:
            try:
                config_path = os.path.join(SRC_FOLDER, config_file)
                fields, filters = load_ini_config(config_path, ppcode, collection)
                logger.info("Using configuration from file: %s", config_file)
            except Exception:
                state_db.set_key("stop_daemon", "Failed to read or parse config file.")
                raise RuntimeError("Failed to read or parse config file")
        else:
            # Use default configuration
            fields, filters = get_default_config(ppcode, collection)
            logger.info("Using default configuration for %s_%s", ppcode, collection)

        state_db.set_key("ppcode", ppcode)
        ppcode = state_db.get_key("ppcode")
        state_db.set_key("collection", collection)
        collection = state_db.get_key("collection")

        if force_date:
            force_new_date(state_db, force_date)

        if not (os.path.isdir(output_folder)):
            os.makedirs(output_folder)

        # Start token generation using API key
        logger.info("Starting token generation...")
        token_manager = TokenManager(portal_host, api_key)
        try:
            token_manager.start_token_generation()
        except Exception:
            state_db.set_key("stop_daemon", "Failed to start token generation.")
            raise

        while True:
            try:
                for headers, log_line, timestamp in generate_logs(token_manager, insights_host, state_db, ppcode, collection, fields, filters, data_format, instance_id):
                    file_path = timestamp.strftime(storage_schema)
                    folder = os.path.dirname(file_path)
                    if not os.path.isdir(folder):
                        os.makedirs(folder)
                    try:
                        with open(file_path, "a", encoding="utf8", newline="") as f:
                            if data_format == "csv" and headers and (not os.path.isfile(file_path) or os.path.getsize(file_path) == 0):
                                f.write(headers + "\n")
                            f.write(log_line + "\n")
                    except Exception:
                        logger.exception("Could not write: %s", repr(log_line))
            except Exception:
                logger.exception("Error occurred while generating logs")
                if state_db.get_key("stop_daemon"):
                    raise
                time.sleep(LOG_POLLING_INTERVAL)
    finally:
        if token_manager:
            token_manager.stop_token_generation()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=("Insights Log Export Script.\n\n"
                    f"  Script version: {EXPORTER_VERSION}\n"
                    f"  Configuration:\n"
                    f"    - Logs collected in: {OUTPUT_FOLDER}\n"
                    f"    - Work folder: {WORK_FOLDER}\n"
                    f"  Examples:\n"
                    f"    1. Sign in and start collecting logs from specified date using an API key (with default fields/filters):\n"
                    f"    python3 {sys.argv[0]} -a start -d '2025-01-01 00:00:00' -pp SSE -c SWG -p portal.dev.forcepointone.com -i tenant.insights.dev.forcepointone.com -df json -k\n\n"
                    f"    2. Sign in and start collecting logs with custom config file:\n"
                    f"    python3 {sys.argv[0]} -a start -d '2025-01-01 00:00:00' -pp SSE -c SWG -p portal.dev.forcepointone.com -i tenant.insights.dev.forcepointone.com -f config.ini -df json -k\n\n"
                    f"    3. Restart with previous credentials and state:\n"
                    f"    python {sys.argv[0]} -a restart -pp SSE -c SWG\n\n"
                    f"    4. Stop collecting logs:\n"
                    f"    python {sys.argv[0]} -a stop -pp SSE -c SWG"),
        formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument("-a", "--action", choices=["start", "stop", "restart"],
                        dest="action", required=True, help="start/stop/restart daemon")
    parser.add_argument("-pp", "--ppcode", choices=["SSE"],
                        dest="ppcode", required=False, help="ppcode of collection", default="SSE")
    parser.add_argument("-c", "--collection", choices=["Admin", "CASBAPI", "CASBInline", "DLP", "Health", "SWG", "ZTNA"],
                        dest="collection", required=True, help="name of collection")
    parser.add_argument("-d", "--date",
                        dest="date", type=str, required=False, default="", help="ISO-formatted start date (UTC),\ne.g., '2020-01-01 00:00:00' or '2020-01-01T00:00:00Z'")
    parser.add_argument("-k", "--api-key",
                        dest="api_key", action="store_true", required=False, help="set API key")
    parser.add_argument("-p", "--portal-host", 
                        dest="portal_host", required=False, help=("portal hostname e.g. portal.forcepointone.com"))
    parser.add_argument("-i", "--insights-host", 
                        dest="insights_host", required=False, help=("insights base url e.g. <tenant>.insights.forcepointone.com"))
    parser.add_argument("-df", "--data-format", choices=["csv", "json"],
                        dest="data_format", required=False, help="log data format", default="csv")
    parser.add_argument("-f", "--config", 
                        dest="config_file", required=False, help=("optional config file for custom fields & filters (uses defaults if not provided)"))
    args = parser.parse_args()

    if not os.path.isdir(WORK_FOLDER):
        os.makedirs(WORK_FOLDER)

    log_file = os.path.join(SRC_FOLDER, f"insapilogs_{args.ppcode}_{args.collection}.log")
    global logger
    logger = get_logger(log_file, "insights_log_exporter")

    date_string = None
    if args.date:
        try:
            date_string = dateutil.parser.parse(args.date).strftime("%Y-%m-%d %H:%M:%S")
        except Exception:
            logger.error("Bad date:", repr(args.date))
            sys.exit(-2)

    cmd = args.action
    api_key = None
    if cmd in ["start", "restart"]:
        if args.api_key or not SAVE_API_KEY:
            api_key = getpass.getpass("API Key: ")

    daemon = Daemon(
        pid_file=os.path.join(WORK_FOLDER, f"{args.ppcode}_{args.collection}_inslogs.pid"),
        force_date=date_string, 
        portal_host=args.portal_host, 
        insights_host=args.insights_host, 
        storage_schema=STORAGE_SCHEMA,
        api_key=api_key, 
        ppcode=args.ppcode, 
        collection=args.collection, 
        table=f"{args.ppcode}_{args.collection}_data",
        data_format=args.data_format, 
        config_file=args.config_file
    )

    if cmd == "start" or cmd == "restart":
        daemon.restart()
    elif cmd == "stop":
        daemon.stop()
    else:
        logger.error("Unknown action:", cmd)
        sys.exit(-1)
