#!/usr/bin/env python3 import sys if sys.stdout.encoding and sys.stdout.encoding.lower() != "utf-8": sys.stdout.reconfigure(encoding="utf-8") """ parse_log.py — Parse an ActivityWatch window-activity log and output JSON. Usage: python scripts/parse_log.py [--categories categories.json] can be: - a specific log file: log/2026-02-24_11-00.log - a date prefix: log/2026-02-24 (merges all log/2026-02-24*.log) Output (stdout, JSON): { "date": "2026-02-23", "hours": [9, 10, ...], "hour_data": { "9": {"AI 工具": 7.3, "程式開發": 2.6, ...}, ... }, "top_titles": { "9": [["Chrome.exe: Claude - ...", 4.6], ...], ... }, "daily_totals": {"AI 工具": 22.1, ...}, "uncategorized_pct": 32.4 } """ import re, json, sys, os, collections, glob as glob_module from pathlib import Path NOISE_TITLES = {"", "Program Manager", "工作切換", "Google Chrome", "flameshot"} def load_categories(path): with open(path, encoding="utf-8") as f: return json.load(f) def categorize(app, title, rules): text = f"{app} {title}" for rule in rules: if rule["name"] == "其他": continue if any(kw in text for kw in rule["keywords"]): return rule["name"] return "其他" def parse(log_paths, rules): if isinstance(log_paths, (str, Path)): log_paths = [log_paths] hour_data = collections.defaultdict(lambda: collections.defaultdict(float)) hour_titles = collections.defaultdict(lambda: collections.defaultdict(float)) pattern = re.compile( r'^\[(\d+):(\d+)\] "(\w+)" "([^"]+)" "([^"]*)" "([\d.]+)"' ) for log_path in log_paths: with open(log_path, encoding="utf-8") as f: for line in f: m = pattern.match(line) if not m: continue hour = int(m.group(1)) app = m.group(4) title = m.group(5).strip() duration = float(m.group(6)) cat = categorize(app, title, rules) hour_data[hour][cat] += duration if title not in NOISE_TITLES and duration >= 2.0: key = f"{app}: {title}" hour_titles[hour][key] += duration # Convert seconds → minutes, round to 1 decimal result_hours = {} for h, cats in hour_data.items(): result_hours[str(h)] = {c: round(s / 60, 1) for c, s in cats.items()} result_titles = {} for h, titles in hour_titles.items(): top5 = sorted(titles.items(), key=lambda x: -x[1])[:5] result_titles[str(h)] = [[t, round(s / 60, 1)] for t, s in top5] # Daily totals daily = collections.defaultdict(float) for cats in result_hours.values(): for c, m in cats.items(): daily[c] += m total = sum(daily.values()) or 1 uncategorized_pct = round(daily.get("其他", 0) / total * 100, 1) date = Path(log_path).stem # e.g. "2026-02-23" hours = sorted(int(h) for h in result_hours) return { "date": date, "hours": hours, "hour_data": result_hours, "top_titles": result_titles, "daily_totals": {c: round(m, 1) for c, m in sorted(daily.items(), key=lambda x: -x[1])}, "uncategorized_pct": uncategorized_pct, } def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("log_file") parser.add_argument("--categories", default=None) args = parser.parse_args() # Resolve categories.json: explicit arg, then adjacent to script, then cwd cat_path = args.categories if not cat_path: script_dir = Path(__file__).parent.parent candidate = script_dir / "categories.json" cat_path = str(candidate) if candidate.exists() else "categories.json" rules = load_categories(cat_path) log_arg = args.log_file if Path(log_arg).is_file(): log_files = [log_arg] date_str = Path(log_arg).stem else: log_files = sorted(glob_module.glob(f"{log_arg}*.log")) if not log_files: print(f"Error: no log files found matching '{log_arg}*.log'", file=sys.stderr) sys.exit(1) date_str = Path(log_arg).name # e.g. "2026-02-24" result = parse(log_files, rules) result["date"] = date_str print(json.dumps(result, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()