#!/usr/bin/env python3
import sys
if sys.stdout.encoding and sys.stdout.encoding.lower() != "utf-8":
    sys.stdout.reconfigure(encoding="utf-8")

"""
parse_log.py — Parse an ActivityWatch window-activity log and output JSON.

Usage:
    python scripts/parse_log.py <log_file_or_date> [--categories categories.json]

    <log_file_or_date> can be:
      - a specific log file:  log/2026-02-24_11-00.log
      - a date prefix:        log/2026-02-24   (merges all log/2026-02-24*.log)

Output (stdout, JSON):
{
  "date": "2026-02-23",
  "hours": [9, 10, ...],
  "hour_data": {
    "9": {"AI 工具": 7.3, "程式開發": 2.6, ...},
    ...
  },
  "top_titles": {
    "9": [["Chrome.exe: Claude - ...", 4.6], ...],
    ...
  },
  "daily_totals": {"AI 工具": 22.1, ...},
  "uncategorized_pct": 32.4
}
"""
import re, json, sys, os, collections, glob as glob_module
from pathlib import Path

NOISE_TITLES = {"", "Program Manager", "工作切換", "Google Chrome", "flameshot"}


def load_categories(path):
    with open(path, encoding="utf-8") as f:
        return json.load(f)


def categorize(app, title, rules):
    text = f"{app} {title}"
    for rule in rules:
        if rule["name"] == "其他":
            continue
        if any(kw in text for kw in rule["keywords"]):
            return rule["name"]
    return "其他"


def parse(log_paths, rules):
    if isinstance(log_paths, (str, Path)):
        log_paths = [log_paths]

    hour_data = collections.defaultdict(lambda: collections.defaultdict(float))
    hour_titles = collections.defaultdict(lambda: collections.defaultdict(float))

    pattern = re.compile(
        r'^\[(\d+):(\d+)\] "(\w+)" "([^"]+)" "([^"]*)" "([\d.]+)"'
    )
    for log_path in log_paths:
        with open(log_path, encoding="utf-8") as f:
            for line in f:
                m = pattern.match(line)
                if not m:
                    continue
                hour = int(m.group(1))
                app = m.group(4)
                title = m.group(5).strip()
                duration = float(m.group(6))

                cat = categorize(app, title, rules)
                hour_data[hour][cat] += duration

                if title not in NOISE_TITLES and duration >= 2.0:
                    key = f"{app}: {title}"
                    hour_titles[hour][key] += duration

    # Convert seconds → minutes, round to 1 decimal
    result_hours = {}
    for h, cats in hour_data.items():
        result_hours[str(h)] = {c: round(s / 60, 1) for c, s in cats.items()}

    result_titles = {}
    for h, titles in hour_titles.items():
        top5 = sorted(titles.items(), key=lambda x: -x[1])[:5]
        result_titles[str(h)] = [[t, round(s / 60, 1)] for t, s in top5]

    # Daily totals
    daily = collections.defaultdict(float)
    for cats in result_hours.values():
        for c, m in cats.items():
            daily[c] += m

    total = sum(daily.values()) or 1
    uncategorized_pct = round(daily.get("其他", 0) / total * 100, 1)

    date = Path(log_path).stem  # e.g. "2026-02-23"
    hours = sorted(int(h) for h in result_hours)

    return {
        "date": date,
        "hours": hours,
        "hour_data": result_hours,
        "top_titles": result_titles,
        "daily_totals": {c: round(m, 1) for c, m in sorted(daily.items(), key=lambda x: -x[1])},
        "uncategorized_pct": uncategorized_pct,
    }


def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("log_file")
    parser.add_argument("--categories", default=None)
    args = parser.parse_args()

    # Resolve categories.json: explicit arg, then adjacent to script, then cwd
    cat_path = args.categories
    if not cat_path:
        script_dir = Path(__file__).parent.parent
        candidate = script_dir / "categories.json"
        cat_path = str(candidate) if candidate.exists() else "categories.json"

    rules = load_categories(cat_path)

    log_arg = args.log_file
    if Path(log_arg).is_file():
        log_files = [log_arg]
        date_str = Path(log_arg).stem
    else:
        log_files = sorted(glob_module.glob(f"{log_arg}*.log"))
        if not log_files:
            print(f"Error: no log files found matching '{log_arg}*.log'", file=sys.stderr)
            sys.exit(1)
        date_str = Path(log_arg).name  # e.g. "2026-02-24"

    result = parse(log_files, rules)
    result["date"] = date_str
    print(json.dumps(result, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    main()