#!/usr/bin/env python3
"""中心 dispatcher：从 Gitea runner 上 SSH 拉每个团队成员的 jsonl，生成日报。

被 .gitea/workflows/daily-report.yml 调用。

数据流（per user）：
    1. ssh user@host 拉 ~/.claude/projects/<encoded-workspace>/*.jsonl 当日 → /tmp/<user>/
    2. 本地跑 extract_signals.py --home /tmp/<user>-fake-home/
    3. 本地跑 gather.sh OVERRIDE_EMAIL=... SKIP_CC_SESSIONS=1
    4. 本地跑 run.py --user-override --gather-file --signals-file
    5. run.py 自己 push 到 chore/daily-reports-rolling + post 周聚合 issue

Users config 来源（按优先级）：
    1. --users-file <path>   显式文件路径
    2. env DAILY_REPORT_USERS_CONFIG   JSON 字符串
    3. scripts/ops/daily-report/users.local.json   本地配置（gitignored）

Config 格式（JSON 数组）：
    [
      {
        "name": "chentao-jia",                                # 必填，用于路径 / git user / Gitea label
        "email": "chentao.jia@ff.com",                       # 必填，git log --author 过滤
        "ssh_host": "chentao@10.x.x.x",                      # 必填，ssh target；本机时写 "@local"
        "workspace_path": "~/Code/workspace"                  # 选填，默认 ~/Code/workspace
      },
      ...
    ]

Fail-soft：任一 user 失败 log + skip + 继续下一个；总 exit 0 除非全失败。
"""
from __future__ import annotations

import argparse
import datetime as dt
import json
import os
import shlex
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path

SCRIPT_DIR = Path(__file__).resolve().parent
REPO_ROOT = SCRIPT_DIR.parents[2]
GATHER_SH = REPO_ROOT / ".agents" / "skills" / "daily-report" / "scripts" / "gather.sh"
EXTRACT_PY = SCRIPT_DIR / "extract_signals.py"
RUN_PY = SCRIPT_DIR / "run.py"

# 共享 helper（参见 extract_signals.empty_payload）
sys.path.insert(0, str(SCRIPT_DIR))
from extract_signals import empty_payload  # noqa: E402

DEFAULT_WORKSPACE = "~/Code/workspace"
LOCAL_CONFIG_PATH = SCRIPT_DIR / "users.local.json"


def log(msg: str) -> None:
    ts = dt.datetime.now().strftime("%H:%M:%S")
    print(f"[run_all {ts}] {msg}", file=sys.stderr, flush=True)


def encode_workspace_to_jsonl_dir(workspace_path: str) -> str:
    """`~/Code/workspace` (already expanded on remote) → `-home-<user>-Code-workspace`.

    Claude Code 用 / → - 编码 cwd 路径作为 ~/.claude/projects 下的子目录名。
    workspace_path 在 ssh 跑时 remote shell 自己会 expand ~；我们这里只做编码。
    """
    # 这里我们不能 expand ~，因为 ~ 是 remote user 的 home；用 sentinel
    # 实际编码在 ssh 远端跑时由 bash 完成（见 fetch_jsonl_for_user）
    return workspace_path


def load_users_config(args) -> list[dict]:
    if args.users_file:
        return json.loads(Path(args.users_file).read_text(encoding="utf-8"))
    env_raw = os.environ.get("DAILY_REPORT_USERS_CONFIG", "").strip()
    if env_raw:
        return json.loads(env_raw)
    if LOCAL_CONFIG_PATH.exists():
        return json.loads(LOCAL_CONFIG_PATH.read_text(encoding="utf-8"))
    raise RuntimeError(
        "No users config: provide --users-file, $DAILY_REPORT_USERS_CONFIG, "
        f"or {LOCAL_CONFIG_PATH}"
    )


def ssh_args(ssh_host: str) -> list[str]:
    """统一 ssh 选项；ssh_host=@local 时返回空表示本机直跑。"""
    if ssh_host == "@local":
        return []
    return [
        "ssh",
        "-o", "BatchMode=yes",
        "-o", "StrictHostKeyChecking=accept-new",
        "-o", "ConnectTimeout=15",
        ssh_host,
    ]


def fetch_jsonl_for_user(user: dict, date_str: str, dest_dir: Path) -> int:
    """拉 user 当日 jsonl 到 dest_dir（本地）。返回拉到的文件数。

    远端命令：
      proj_dir = ~/.claude/projects/<encoded-workspace>
      列出 mtime ≥ DATE 或文件名包含 DATE 的 jsonl，tar 流回来。
      只拉当日相关文件，减小传输量（重度 user 一天累计也就几 MB）。
    """
    ws = user.get("workspace_path") or DEFAULT_WORKSPACE
    # 本地预算 NEXT_DATE 用于 find -newermt 上界（避免 remote shell 跨日期算术不一致）
    next_date = (dt.date.fromisoformat(date_str) + dt.timedelta(days=1)).isoformat()
    # remote bash 自己负责 ~ expand + 路径编码（用 case 显式处理 ~，不走 eval）
    # opt-out 检查在最前——任何 jsonl 读取都不发生
    encoded_cmd = (
        # 1) opt-out 第一道闸门：在远端用户机上检测 flag，命中即早退（jsonl 不流回 runner）
        '[ -f "$HOME/.claude-insight-optout" ] && { echo OPTED_OUT >&2; exit 0; } ; '
        f"WS={shlex.quote(ws)}; "
        "case \"$WS\" in "
        "  '~/'*) WS_ABS=\"$HOME/${WS#~/}\" ;; "
        "  '~') WS_ABS=\"$HOME\" ;; "
        "  *) WS_ABS=\"$WS\" ;; "
        "esac; "
        "ENC=$(printf '%s' \"$WS_ABS\" | sed 's|/|-|g'); "
        "PROJ=\"$HOME/.claude/projects/${ENC}\"; "
        "if [ ! -d \"$PROJ\" ]; then echo 'NO_PROJ_DIR' >&2; exit 0; fi; "
        f"DATE={shlex.quote(date_str)}; "
        f"NEXT_DATE={shlex.quote(next_date)}; "
        # 当日 mtime 文件（[DATE 00:00, NEXT_DATE 00:00)）
        "find \"$PROJ\" -maxdepth 1 -name '*.jsonl' "
        "-newermt \"$DATE 00:00:00\" '!' -newermt \"$NEXT_DATE 00:00:00\" "
        "| tar -c --files-from=- 2>/dev/null || true"
    )
    dest_dir.mkdir(parents=True, exist_ok=True)
    base = ssh_args(user["ssh_host"])
    cmd = base + ["bash", "-c", encoded_cmd] if base else ["bash", "-c", encoded_cmd]
    # log 用 user.name 不带 ssh_host，避免 user@ip 写进 Gitea Actions log
    log(f"fetch jsonl: {user['name']}")
    proc = subprocess.run(cmd, capture_output=True, timeout=120)
    stderr_text = proc.stderr.decode(errors='replace')
    if "OPTED_OUT" in stderr_text:
        log(f"  {user['name']} opted out via ~/.claude-insight-optout; skipping (no jsonl read)")
        return -1
    if proc.returncode != 0:
        log(f"WARN fetch failed for {user['name']}: {stderr_text[:200]}")
        return 0
    if not proc.stdout:
        log(f"  no jsonl today for {user['name']}")
        return 0
    # 把 tar stream 解压到 dest_dir
    try:
        untar = subprocess.run(
            ["tar", "-x", "-C", str(dest_dir)],
            input=proc.stdout,
            capture_output=True,
            timeout=60,
        )
        if untar.returncode != 0:
            log(f"WARN tar extract failed: {untar.stderr.decode(errors='replace')[:200]}")
            return 0
    except subprocess.TimeoutExpired:
        log(f"WARN tar extract timeout")
        return 0
    files = list(dest_dir.rglob("*.jsonl"))
    log(f"  pulled {len(files)} jsonl file(s), total {sum(f.stat().st_size for f in files)} bytes")
    return len(files)


def stage_fake_home(user: dict, jsonl_dir: Path, fake_home: Path) -> Path:
    """把拉来的 jsonl 摆到 fake_home/.claude/projects/<encoded>/ 下，给 extract_signals 用。

    extract_signals 扫 `~/.claude/projects/*/*.jsonl`；它接受 --home 参数所以用
    fake home 即可。返回 fake_home。
    """
    ws = user.get("workspace_path") or DEFAULT_WORKSPACE
    # 编码：~ 不重要（extract_signals discover 时按 glob 匹配任何子目录）
    # 为了让 cwd 字段在信号里仍然可读，编码成 `-remote-${user.name}-Code-workspace` 风格
    fake_user = user["name"]
    encoded = f"-remote-{fake_user}-Code-workspace"
    target = fake_home / ".claude" / "projects" / encoded
    target.mkdir(parents=True, exist_ok=True)
    # 把 jsonl_dir 下所有 .jsonl 文件拷过去
    for f in jsonl_dir.rglob("*.jsonl"):
        shutil.copy2(f, target / f.name)
    return fake_home


def run_gather_local(user: dict, date_str: str, out_path: Path) -> None:
    """本地 runner 上跑 gather.sh，OVERRIDE_EMAIL + SKIP_CC_SESSIONS."""
    env = os.environ.copy()
    env["OVERRIDE_EMAIL"] = user["email"]
    env["OVERRIDE_NAME"] = user.get("name", "")
    env["SKIP_CC_SESSIONS"] = "1"
    with open(out_path, "w") as f:
        subprocess.run(
            ["bash", str(GATHER_SH), date_str],
            env=env,
            stdout=f,
            stderr=subprocess.PIPE,
            timeout=120,
            cwd=str(REPO_ROOT),
            check=False,
        )


def run_extract_signals_local(fake_home: Path, date_str: str, out_path: Path) -> None:
    """本地跑 extract_signals.py，输出到 out_path。"""
    with open(out_path, "w") as f:
        subprocess.run(
            ["python3", str(EXTRACT_PY), "--date", date_str, "--home", str(fake_home)],
            stdout=f,
            stderr=subprocess.PIPE,
            timeout=120,
            check=False,
        )


def generate_for_user(user: dict, date_str: str, dry_run: bool, no_gitea: bool) -> bool:
    """为单个 user 跑完整流水线。返回 True=成功，False=失败（fail-soft）。"""
    log(f"--- {user['name']} ({user['email']}) ---")
    with tempfile.TemporaryDirectory(prefix=f"daily-{user['name']}-") as td:
        td_path = Path(td)
        jsonl_dir = td_path / "jsonl"
        fake_home = td_path / "fake-home"
        gather_file = td_path / "gather.txt"
        signals_file = td_path / "signals.json"

        # 1. 拉 jsonl
        try:
            n = fetch_jsonl_for_user(user, date_str, jsonl_dir)
        except subprocess.TimeoutExpired:
            log(f"  ERROR fetch timeout; skip user")
            return False

        # opt-out 早退：跳过所有后续步骤（不跑 gather、不调 LLM、不写 Gitea）
        # 视为成功 skip（不计 failed），avoid 把"用户合法 opt-out"算作链路失败
        if n == -1:
            return True

        # 2. 摆到 fake home（即使 n=0 也要继续，因为 gather.sh 可能仍有 commits）
        if n > 0:
            stage_fake_home(user, jsonl_dir, fake_home)

        # 3. 跑 gather.sh
        run_gather_local(user, date_str, gather_file)

        # 4. 跑 extract_signals
        if n > 0:
            run_extract_signals_local(fake_home, date_str, signals_file)
        else:
            signals_file.write_text(json.dumps(empty_payload(date_str)))

        # 5. 调 run.py
        cmd = [
            "python3", str(RUN_PY),
            "--date", date_str,
            "--user", user["name"],
            "--gather-file", str(gather_file),
            "--signals-file", str(signals_file),
        ]
        if dry_run:
            cmd.append("--dry-run")
        if no_gitea:
            cmd.append("--no-gitea")
        log(f"  invoke run.py")
        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
        if proc.returncode != 0:
            log(f"  ERROR run.py exit {proc.returncode}: {proc.stderr[:400]}")
            return False
        log(f"  done for {user['name']}")
        if dry_run and proc.stdout.strip():
            print(f"\n========== {user['name']} {date_str} ==========\n{proc.stdout}\n")
        return True


def main() -> int:
    p = argparse.ArgumentParser(description="Daily report central dispatcher")
    p.add_argument("--date", default=dt.date.today().isoformat(), help="YYYY-MM-DD")
    p.add_argument("--users-file", help="Path to users JSON (overrides env DAILY_REPORT_USERS_CONFIG)")
    p.add_argument("--only-user", help="Only run for this user name (debug)")
    p.add_argument("--dry-run", action="store_true", help="don't push to Gitea, print markdown to stdout")
    p.add_argument("--no-gitea", action="store_true", help="skip Gitea issue post (still push md to rolling branch)")
    args = p.parse_args()

    try:
        users = load_users_config(args)
    except (RuntimeError, json.JSONDecodeError) as e:
        log(f"FATAL: {e}")
        return 2

    if args.only_user:
        users = [u for u in users if u.get("name") == args.only_user]
        if not users:
            log(f"FATAL: --only-user {args.only_user} not in config")
            return 2

    log(f"dispatching {len(users)} user(s) for {args.date}")
    ok = 0
    failed = 0
    for u in users:
        missing = [k for k in ("name", "email", "ssh_host") if k not in u]
        if missing:
            log(f"SKIP malformed user entry (missing {missing}): {u}")
            failed += 1
            continue
        try:
            if generate_for_user(u, args.date, args.dry_run, args.no_gitea):
                ok += 1
            else:
                failed += 1
        except Exception as e:
            log(f"  EXCEPTION for {u.get('name','?')}: {e}")
            failed += 1
    log(f"complete: ok={ok} failed={failed}")
    return 0 if ok > 0 or len(users) == 0 else 1


if __name__ == "__main__":
    sys.exit(main())
