Sindbad~EG File Manager

Current Path : /opt/dedrads/mailparse/
Upload File :
Current File : //opt/dedrads/mailparse/mail_analytics

#!/opt/imh-python/bin/python3
'''
The purpose of this script is to preemptively detect abusive email behavior
via analytics from exim logs

We will never use this data for marketing or advertising purposes.
We only use this data to help protect our customers and our network.

We will never use this data for marketing or advertising purposes.
We only use this data to help protect our customers and our network.
'''
import argparse
import json
import logging
import os
import socket
import sys
import time
from typing import Dict, List
import requests
import yaml
import eximparse
from rads import setup_logging, lock, LockError

MAX_BATCH_SIZE_BYTES = 5 * 1024 * 1024  # 5MB
TLS_VERIFY = True
TIMEOUTS = {"connect": 5, "read": 15}
MAX_RETRIES = 3
RETRY_BACKOFF_SECONDS = 2
OUTPUT_LOG_PATH = "/opt/dedrads/mailparse/out/messages.json"
CONFIG_PATH = "/opt/dedrads/mailparse/analytics.yaml"
LOG_FILE_PATH = "/var/log/mail_analytics.log"

def url_for(cfg, route: str) -> str:
    """Construct full URL for given route."""
    return f"{cfg['endpoint_url'].rstrip('/')}/{route.lstrip('/')}"

def init_logging():
    """Setup logging to /var/log/mail_analytics.log"""
    try:
        setup_logging(LOG_FILE_PATH, chmod=0o640)
        return True
    except OSError as e:
        print(f"Failed to setup logging: {e}", file=sys.stderr)
        return False


def load_config(path: str) -> Dict:
    with open(path, "r", encoding="utf-8") as f:
        cfg = yaml.safe_load(f) or {}

    # Validate required keys
    for k in ["endpoint_url", "bearer_token"]:
        if not cfg.get(k):
            raise ValueError(f"Missing required config key: {k}")

    return cfg


def read_and_delete_logfile(logfile: str, dry_run: bool = False):
    """Reads all records from logfile and deletes it (unless dry_run)."""
    if not os.path.exists(logfile):
        return

    with open(logfile, "r", encoding="utf-8", errors="replace") as f:
        last_pos = 0
        while (line := f.readline()) != "":
            pos = f.tell()
            try:
                rec = json.loads(line)
                yield rec, pos - last_pos
            except json.JSONDecodeError:
                print(f"Skipping invalid JSON line: {line}")
            finally:
                last_pos = pos
    if not dry_run:
        os.remove(logfile)

def send_batch(
    buffer: dict[str, list[dict]], 
    cfg: Dict
) -> bool:
    """Send a single batch to the endpoint. Returns True on success, False on failure."""
    try:
        for host, records in buffer.items():
            host_data = {
                "hostname": host,
                "data": records
            }
            post_batch_keyed_dict(
                url_for(cfg, '/api/dedi'),
                token=cfg["bearer_token"],
                host_data=host_data,
            )
        return True
    except Exception:
        return False


def post_batch_keyed_dict(
    endpoint: str,
    token: str,
    host_data: dict,
) -> None:
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    timeout_tuple = (float(TIMEOUTS["connect"]), float(TIMEOUTS["read"]))

    attempt = 0
    while True:
        attempt += 1
        try:
            resp = requests.post(
                endpoint,
                headers=headers,
                json=host_data,
                timeout=timeout_tuple,
                verify=TLS_VERIFY,
            )
            if 200 <= resp.status_code < 300:
                return
            msg = (
                f"HTTP {resp.status_code} from {endpoint}. "
                f"Body: {resp.text[:500]}"
            )
        except requests.RequestException as e:
            msg = f"Request error: {e}"

        if attempt >= MAX_RETRIES:
            raise RuntimeError(
                f"Failed to POST batch after {attempt} attempts: {msg}"
            )
        time.sleep(RETRY_BACKOFF_SECONDS * attempt)



def get_hostname() -> str:
    """Get hostname from /etc/salt/minion_id"""
    if not os.path.exists("/etc/salt/minion_id"):
        return socket.gethostname()  # fallback to system hostname

    with open("/etc/salt/minion_id", "r", encoding="utf-8") as f:
        return f.read().strip()

def send_no_mail(
    hostname: str,
    endpoint: str,
    token: str,
) -> bool:
    '''
    Notify webserver we have no data this run
    '''
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    timeout_tuple = (float(TIMEOUTS["connect"]), float(TIMEOUTS["read"]))
    try:
        resp = requests.post(
            f"{endpoint}/no-mail/{hostname}",
            headers=headers,
            timeout=timeout_tuple,
            verify=TLS_VERIFY,
        )

        return resp.status_code == 200

    except requests.RequestException:
        return None


def check_status(
    hostname: str,
    endpoint: str,
    token: str,
) -> bool:
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    timeout_tuple = (float(TIMEOUTS["connect"]), float(TIMEOUTS["read"]))
    try:
        resp = requests.get(
            f"{endpoint}/status?host={hostname}",
            headers=headers,
            timeout=timeout_tuple,
            verify=TLS_VERIFY,
        )

        return resp.status_code == 200

    except requests.RequestException:
        return None


def process_batches(
    logfile: str,
    host: str,
    cfg: Dict,
    args,
) -> int:
    """Process batches like a stream as they are read"""
    total_bytes = 0
    buffer: dict[str, list[dict]] = {}
    sent = 0
    batch_num = 0

    for rec, byte_count in read_and_delete_logfile(logfile, args.dry_run):
        if host not in buffer:
            buffer[host] = []
        buffer[host].append(rec)
        total_bytes += byte_count

        if total_bytes > MAX_BATCH_SIZE_BYTES:
            batch_num += 1
            count = sum(len(records) for records in buffer.values())
            human_mb = total_bytes / (1024 * 1024)
            
            if args.dry_run:
                print(
                    f"DRY RUN: Would send batch #{batch_num} with {count} items "
                    f"(~{human_mb:.2f} MB) to {cfg['endpoint_url']}"
                )
                logging.info(f"DRY RUN: Batch #{batch_num}: {count} items, ~{human_mb:.2f} MB")
                sent += count
            else:
                logging.info(f"Processing batch #{batch_num}: {count} items, ~{human_mb:.2f} MB")
                if send_batch(buffer, cfg):
                    logging.info(f"Successfully sent batch #{batch_num}")
                    sent += count
                else:
                    logging.error(f"Failed to send batch #{batch_num}")
                    raise RuntimeError(f"Failed to send batch #{batch_num}")
            
            buffer.clear()
            total_bytes = 0

    if buffer:
        batch_num += 1
        count = sum(len(records) for records in buffer.values())
        human_mb = total_bytes / (1024 * 1024)
        
        if args.dry_run:
            print(
                f"DRY RUN: Would send batch #{batch_num} with {count} items "
                f"(~{human_mb:.2f} MB) to {cfg['endpoint_url']}"
            )
            logging.info(f"DRY RUN: Batch #{batch_num}: {count} items, ~{human_mb:.2f} MB")
            sent += count
        else:
            logging.info(f"Processing batch #{batch_num}: {count} items, ~{human_mb:.2f} MB")
            if send_batch(buffer, cfg):
                logging.info(f"Successfully sent batch #{batch_num}")
                sent += count
            else:
                logging.error(f"Failed to send batch #{batch_num}")
                raise RuntimeError(f"Failed to send batch #{batch_num}")

    if sent == 0:
        print("No new lines to push.")
        send_no_mail(host, cfg["endpoint_url"], cfg["bearer_token"])
    else:
        print(
            f"Pushed {sent} record(s) in batches capped at "
            f"{MAX_BATCH_SIZE_BYTES // (1024*1024)} MB to {cfg['endpoint_url']}."
        )
        logging.info(
            f"Successfully completed mailpush: {sent} records sent to "
            f"{cfg['endpoint_url']}"
        )
    
    return sent


def main():
    # Setup logging first
    if not init_logging():
        print(
            "Failed to setup logging, continuing without file logging",
            file=sys.stderr,
        )

    logging.info("Starting dedicated mailpush script")

    parser = argparse.ArgumentParser(
        description="Push parsed mail data for dedicated servers."
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Do everything but send data.",
    )
    args = parser.parse_args()

    try:
        cfg = load_config(CONFIG_PATH)
        logging.info(f"Loaded config from {CONFIG_PATH}")
    except Exception as e:
        logging.error(f"Failed to load config from {CONFIG_PATH}: {e}")
        sys.exit(1)
    # Get hostname and process logfile
    host = get_hostname()
    status = check_status(host, cfg["endpoint_url"], cfg["bearer_token"])
    if not status:
        if status is None:
            logging.error("Could not reach upstream server.")
            sys.exit(1)

        logging.error("Upstream server told us to come back later; "
                      "aborting cleanly.")
        sys.exit(0)
    # Get hostname and process logfile
    host = get_hostname()
    status = check_status(host, cfg["endpoint_url"], cfg["bearer_token"])
    if not status:
        if status is None:
            logging.error("Could not reach upstream server.")
            sys.exit(1)

        logging.error("Upstream server told us to come back later; "
                      "aborting cleanly.")
        sys.exit(0)

    try:
        # Acquire lock
        with lock("mail_analytics"):
            logging.info("Acquired lock for mail_analytics")

            # Run parser locally
            try:
                eximparse.parse_exim()
            except Exception as e:
                raise Exception(f"Failed to run local parser: {e}") from e

            # Get hostname and process logfile
            logfile = OUTPUT_LOG_PATH
            logging.info(f"Processing logfile: {logfile} for host: {host}")

            # Process logfile in streaming fashion
            process_batches(logfile, host, cfg, args)

    except LockError:
        print("Another instance is running; exiting.", file=sys.stderr)
        logging.warning("Could not acquire lock - another instance is running")
        sys.exit(1)


if __name__ == "__main__":
    try:
        main()
    except Exception as exc:
        print(f"ERROR: {exc}", file=sys.stderr)
        sys.exit(1)

Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists