#!/usr/bin/env bash
# Prove POST /admin/sync works without restarting the activity-core worker.
set -euo pipefail

NAMESPACE="${ACTIVITY_CORE_NAMESPACE:-activity-core}"
CLUSTER_HOST="${ACTIVITY_CORE_CLUSTER_HOST:-railiance01}"
STATE_HUB_URL="${STATE_HUB_URL:-http://127.0.0.1:8000}"
ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL="${ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL:-0}"
ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND="${ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND:-}"
ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE="${ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE:-0}"
EVIDENCE_WORKSTREAM_ID="${STATE_HUB_EVIDENCE_WORKSTREAM_ID:-2c9e8e96-ec6a-433c-9e6d-0efbcd18679e}"
EVIDENCE_TASK_ID="${STATE_HUB_EVIDENCE_TASK_ID:-60f3387d-3d14-42a9-b8a3-725a86468510}"

STARTED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
CURRENT_GATE=startup
BEFORE_JSON=""
AFTER_JSON=""
FIXTURE_STATUS=skipped
SYNC_RESPONSE_JSON=""
EVIDENCE_NOTE_JSON=""

export NAMESPACE CLUSTER_HOST STATE_HUB_URL EVIDENCE_WORKSTREAM_ID EVIDENCE_TASK_ID
export STARTED_AT BEFORE_JSON AFTER_JSON FIXTURE_STATUS SYNC_RESPONSE_JSON

log() { printf '[activity-core-admin-sync-smoke] %s\n' "$*"; }
quote() { printf '%q' "$1"; }
cluster_bash() { if [[ -n "$CLUSTER_HOST" ]]; then ssh "$CLUSTER_HOST" "bash -s" <<<"$1"; else bash -s <<<"$1"; fi; }

post_evidence() {
  local status="$1" failing_gate="${2:-}"
  export EVIDENCE_STATUS="$status" FAILING_GATE="$failing_gate"
  python3 - <<'PY'
import json, os, sys, urllib.request

def env_json(name):
    raw = os.environ.get(name, "")
    if not raw:
        return None
    try:
        return json.loads(raw)
    except json.JSONDecodeError:
        return {"raw": raw}

status = os.environ["EVIDENCE_STATUS"]
failing_gate = os.environ.get("FAILING_GATE") or None
detail = {
    "producer": "railiance-cluster",
    "verification": "activity-core no-restart admin sync smoke",
    "status": status,
    "failing_gate": failing_gate,
    "cluster_host": os.environ.get("CLUSTER_HOST") or "local-kubectl",
    "namespace": os.environ.get("NAMESPACE"),
    "worker_before": env_json("BEFORE_JSON"),
    "worker_after": env_json("AFTER_JSON"),
    "fixture_status": os.environ.get("FIXTURE_STATUS"),
    "sync_response": env_json("SYNC_RESPONSE_JSON"),
    "started_at": os.environ.get("STARTED_AT"),
}
summary = (
    "Railiance activity-core no-restart admin-sync smoke passed: POST /admin/sync returned expected counters and worker pod identity/restart count stayed stable."
    if status == "passed"
    else "Railiance activity-core no-restart admin-sync smoke failed" + (f" at {failing_gate}" if failing_gate else "") + "; see non-secret evidence detail."
)
payload = {"summary": summary, "event_type": "note", "author": "railiance-cluster", "detail": detail}
if os.environ.get("EVIDENCE_WORKSTREAM_ID"):
    payload["workstream_id"] = os.environ["EVIDENCE_WORKSTREAM_ID"]
if os.environ.get("EVIDENCE_TASK_ID"):
    payload["task_id"] = os.environ["EVIDENCE_TASK_ID"]
req = urllib.request.Request(os.environ["STATE_HUB_URL"].rstrip("/") + "/progress/", data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"}, method="POST")
with urllib.request.urlopen(req, timeout=20) as resp:
    sys.stdout.write(resp.read().decode())
PY
}

on_error() { local code=$?; trap - ERR; post_evidence failed "$CURRENT_GATE" >/dev/null || true; exit "$code"; }
trap on_error ERR

if [[ "$CLUSTER_HOST" == local ]]; then
  [[ "$ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL" == 1 ]] || { echo 'ACTIVITY_CORE_CLUSTER_HOST=local requires ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL=1' >&2; exit 2; }
  CLUSTER_HOST=""
fi
export CLUSTER_HOST

CURRENT_GATE='cluster executor preflight'
log "using cluster executor: ${CLUSTER_HOST:-local kubectl}"
cluster_bash 'set -euo pipefail; command -v kubectl >/dev/null; command -v python3 >/dev/null'

worker_snapshot_script='import json,sys
items=json.load(sys.stdin).get("items",[])
if not items: raise SystemExit("no actcore-worker pods found")
pod=sorted(items,key=lambda item:item["metadata"]["name"])[0]
container=pod["status"]["containerStatuses"][0]
print(json.dumps({"name":pod["metadata"]["name"],"uid":pod["metadata"]["uid"],"phase":pod["status"].get("phase"),"restart_count":container.get("restartCount",0),"image":container.get("image"),"image_id":container.get("imageID")}, sort_keys=True))'

CURRENT_GATE='worker baseline capture'
BEFORE_JSON="$(cluster_bash "kubectl -n $(quote "$NAMESPACE") get pod -l app.kubernetes.io/name=actcore-worker -o json | python3 -c $(quote "$worker_snapshot_script")")"
export BEFORE_JSON

CURRENT_GATE='admin sync fixture'
if [[ -n "$ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND" ]]; then
  log 'running operator-supplied fixture command'
  cluster_bash "$ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND"
  FIXTURE_STATUS=ran
elif [[ "$ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE" == 1 ]]; then
  echo 'ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE=1 but no fixture command was supplied' >&2
  exit 2
else
  FIXTURE_STATUS=skipped
fi
export FIXTURE_STATUS

CURRENT_GATE='POST /admin/sync'
log 'calling POST /admin/sync?definitions=true&schedules=true'
SYNC_RESPONSE_JSON="$(
  cluster_bash "$(cat <<EOF
set -euo pipefail
kubectl -n $(quote "$NAMESPACE") exec -i deploy/actcore-api -- python - <<'PY'
import json, urllib.request
req = urllib.request.Request('http://localhost:8010/admin/sync?definitions=true&schedules=true', method='POST')
with urllib.request.urlopen(req, timeout=60) as resp:
    payload = json.loads(resp.read().decode())
required = [('definitions','synced'),('schedules','upserted'),('schedules','paused'),('schedules','deleted_orphans'),('errors',None)]
for section, key in required:
    if section not in payload:
        raise SystemExit(f'missing sync response section {section!r}')
    if key is not None and key not in payload[section]:
        raise SystemExit(f'missing sync response key {section}.{key}')
if payload.get('errors'):
    raise SystemExit('admin sync returned errors: ' + json.dumps(payload['errors']))
print(json.dumps(payload, sort_keys=True))
PY
EOF
)"
)"
export SYNC_RESPONSE_JSON

CURRENT_GATE='worker no-restart verification'
AFTER_JSON="$(cluster_bash "kubectl -n $(quote "$NAMESPACE") get pod -l app.kubernetes.io/name=actcore-worker -o json | python3 -c $(quote "$worker_snapshot_script")")"
python3 - <<'PY'
import json, os
before = json.loads(os.environ['BEFORE_JSON'])
after = json.loads(os.environ['AFTER_JSON'])
if before['uid'] != after['uid']:
    raise SystemExit(f"worker pod changed uid: {before['uid']} -> {after['uid']}")
if before['restart_count'] != after['restart_count']:
    raise SystemExit(f"worker restart count changed: {before['restart_count']} -> {after['restart_count']}")
PY
export AFTER_JSON

CURRENT_GATE='State Hub evidence note'
log 'posting non-secret evidence note to State Hub'
EVIDENCE_NOTE_JSON="$(post_evidence passed '')"
trap - ERR
log 'verification passed'
printf '%s\n' "$EVIDENCE_NOTE_JSON"
