#!/usr/bin/env bash
# Cluster-owned activity-core <-> llm-connect reconcile and non-secret evidence.
set -euo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"

NAMESPACE="${ACTIVITY_CORE_NAMESPACE:-activity-core}"
CLUSTER_HOST="${ACTIVITY_CORE_CLUSTER_HOST:-railiance01}"
STATE_HUB_URL="${STATE_HUB_URL:-http://127.0.0.1:8000}"

EXPECTED_URL="${LLM_CONNECT_URL:-http://llm-connect.activity-core.svc.cluster.local:8080}"
EXPECTED_TIMEOUT="${LLM_CONNECT_TIMEOUT_SECONDS:-300}"
SECRET_NAME="${LLM_CONNECT_PROVIDER_SECRET_NAME:-llm-connect-provider-secrets}"
DEPLOYMENT_NAME="${LLM_CONNECT_DEPLOYMENT_NAME:-llm-connect}"
LLM_CONNECT_REPO="${LLM_CONNECT_REPO:-/home/worsch/llm-connect}"
LLM_CONNECT_REMOTE_REPO="${LLM_CONNECT_REMOTE_REPO:-}"
APPLY_LLM_CONNECT_OVERLAY="${APPLY_LLM_CONNECT_OVERLAY:-1}"
REQUIRE_SMOKE="${REQUIRE_LLM_CONNECT_SMOKE:-0}"

EVIDENCE_WORKSTREAM_ID="${STATE_HUB_EVIDENCE_WORKSTREAM_ID:-}"
EVIDENCE_TASK_ID="${STATE_HUB_EVIDENCE_TASK_ID:-}"

PATCH_JSON="$(
  EXPECTED_URL="$EXPECTED_URL" EXPECTED_TIMEOUT="$EXPECTED_TIMEOUT" python3 - <<'PY'
import json
import os

print(json.dumps({
    "data": {
        "LLM_CONNECT_URL": os.environ["EXPECTED_URL"],
        "LLM_CONNECT_TIMEOUT_SECONDS": os.environ["EXPECTED_TIMEOUT"],
    }
}))
PY
)"

LIVE_URL=""
LIVE_TIMEOUT=""
SECRET_STATUS="unknown"
SECRET_KEY_COUNT="0"
DEPLOYMENT_STATUS="unknown"
SMOKE_STATUS="skipped"
SMOKE_SUMMARY=""
EVIDENCE_STATUS="passed"
FAILING_GATE=""

export NAMESPACE CLUSTER_HOST STATE_HUB_URL EXPECTED_URL EXPECTED_TIMEOUT
export SECRET_NAME DEPLOYMENT_NAME LLM_CONNECT_REPO LLM_CONNECT_REMOTE_REPO
export APPLY_LLM_CONNECT_OVERLAY REQUIRE_SMOKE EVIDENCE_WORKSTREAM_ID EVIDENCE_TASK_ID
export LIVE_URL LIVE_TIMEOUT SECRET_STATUS SECRET_KEY_COUNT DEPLOYMENT_STATUS
export SMOKE_STATUS SMOKE_SUMMARY EVIDENCE_STATUS FAILING_GATE

log() {
  printf '[activity-core-llm-connect] %s\n' "$*"
}

quote() {
  printf '%q' "$1"
}

cluster_bash() {
  local script="$1"
  if [[ -n "$CLUSTER_HOST" ]]; then
    ssh "$CLUSTER_HOST" "bash -s" <<<"$script"
  else
    bash -s <<<"$script"
  fi
}

post_evidence() {
  python3 - <<'PY'
import json
import os
import sys
import urllib.request

status = os.environ["EVIDENCE_STATUS"]
detail = {
    "producer": "railiance-cluster",
    "verification": "activity-core llm-connect live reconcile",
    "status": status,
    "failing_gate": os.environ.get("FAILING_GATE") or None,
    "cluster_host": os.environ.get("CLUSTER_HOST") or "local-kubectl",
    "namespace": os.environ["NAMESPACE"],
    "expected_url": os.environ["EXPECTED_URL"],
    "expected_timeout_seconds": os.environ["EXPECTED_TIMEOUT"],
    "live_url": os.environ.get("LIVE_URL") or None,
    "live_timeout_seconds": os.environ.get("LIVE_TIMEOUT") or None,
    "provider_secret": {
        "name": os.environ["SECRET_NAME"],
        "status": os.environ.get("SECRET_STATUS"),
        "key_count": int(os.environ.get("SECRET_KEY_COUNT") or "0"),
    },
    "deployment": {
        "name": os.environ["DEPLOYMENT_NAME"],
        "status": os.environ.get("DEPLOYMENT_STATUS"),
    },
    "smoke": {
        "status": os.environ.get("SMOKE_STATUS"),
        "summary": os.environ.get("SMOKE_SUMMARY") or None,
    },
}

if status == "passed":
    summary = (
        "Railiance activity-core llm-connect reconcile passed: runtime config, "
        "provider Secret, deployment, and smoke gate are all healthy."
    )
elif status == "blocked":
    summary = (
        "Railiance activity-core llm-connect reconcile is blocked: "
        f"{os.environ.get('FAILING_GATE') or 'operator gate'}."
    )
else:
    summary = (
        "Railiance activity-core llm-connect reconcile failed"
        + (f" at {os.environ.get('FAILING_GATE')}" if os.environ.get("FAILING_GATE") else "")
        + "."
    )

payload = {
    "summary": summary,
    "event_type": "note",
    "author": "railiance-cluster",
    "detail": detail,
}
if os.environ.get("EVIDENCE_WORKSTREAM_ID"):
    payload["workstream_id"] = os.environ["EVIDENCE_WORKSTREAM_ID"]
if os.environ.get("EVIDENCE_TASK_ID"):
    payload["task_id"] = os.environ["EVIDENCE_TASK_ID"]

body = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(
    os.environ["STATE_HUB_URL"].rstrip("/") + "/progress/",
    data=body,
    headers={"Content-Type": "application/json"},
    method="POST",
)
with urllib.request.urlopen(req, timeout=20) as resp:
    sys.stdout.write(resp.read().decode("utf-8"))
PY
}

if [[ -z "$LLM_CONNECT_REMOTE_REPO" ]]; then
  if [[ -n "$CLUSTER_HOST" ]]; then
    LLM_CONNECT_REMOTE_REPO="$(ssh "$CLUSTER_HOST" pwd)/llm-connect"
  else
    LLM_CONNECT_REMOTE_REPO="$LLM_CONNECT_REPO"
  fi
fi
export LLM_CONNECT_REMOTE_REPO

log "using cluster executor: ${CLUSTER_HOST:-local kubectl}"
cluster_bash 'set -euo pipefail; command -v kubectl >/dev/null'

log "reconciling non-secret activity-core runtime config"
cluster_bash "$(cat <<EOF
set -euo pipefail
kubectl -n $(quote "$NAMESPACE") patch configmap actcore-runtime-config --type merge -p $(quote "$PATCH_JSON")
EOF
)"

LIVE_URL="$(
  cluster_bash "$(cat <<EOF
set -euo pipefail
kubectl -n $(quote "$NAMESPACE") get cm actcore-runtime-config -o jsonpath='{.data.LLM_CONNECT_URL}'
EOF
)"
)"
LIVE_TIMEOUT="$(
  cluster_bash "$(cat <<EOF
set -euo pipefail
kubectl -n $(quote "$NAMESPACE") get cm actcore-runtime-config -o jsonpath='{.data.LLM_CONNECT_TIMEOUT_SECONDS}'
EOF
)"
)"
export LIVE_URL LIVE_TIMEOUT

if [[ "$LIVE_URL" != "$EXPECTED_URL" || "$LIVE_TIMEOUT" != "$EXPECTED_TIMEOUT" ]]; then
  EVIDENCE_STATUS="failed"
  FAILING_GATE="runtime config mismatch"
  export EVIDENCE_STATUS FAILING_GATE
  post_evidence
  exit 1
fi

SECRET_KEY_COUNT="$(
  cluster_bash "$(cat <<EOF
set -euo pipefail
if kubectl -n $(quote "$NAMESPACE") get secret $(quote "$SECRET_NAME") >/dev/null 2>&1; then
  kubectl -n $(quote "$NAMESPACE") get secret $(quote "$SECRET_NAME") -o go-template='{{ len .data }}'
else
  printf missing
fi
EOF
)"
)"
if [[ "$SECRET_KEY_COUNT" == "missing" ]]; then
  SECRET_STATUS="missing"
  SECRET_KEY_COUNT="0"
elif [[ "${SECRET_KEY_COUNT:-0}" == "0" ]]; then
  SECRET_STATUS="empty"
else
  SECRET_STATUS="present"
fi
export SECRET_STATUS SECRET_KEY_COUNT

if [[ "$SECRET_STATUS" != "present" ]]; then
  EVIDENCE_STATUS="blocked"
  FAILING_GATE="provider Secret ${SECRET_NAME} ${SECRET_STATUS}"
  DEPLOYMENT_STATUS="not checked; provider Secret gate not satisfied"
  SMOKE_STATUS="blocked"
  SMOKE_SUMMARY="provider Secret must be populated outside Git/State Hub before deployment and smoke"
  export EVIDENCE_STATUS FAILING_GATE DEPLOYMENT_STATUS SMOKE_STATUS SMOKE_SUMMARY
  post_evidence
  [[ "$REQUIRE_SMOKE" == "1" ]] && exit 1
  exit 0
fi

if [[ "$APPLY_LLM_CONNECT_OVERLAY" == "1" ]]; then
  if [[ -n "$CLUSTER_HOST" ]]; then
    log "syncing llm-connect overlay to ${CLUSTER_HOST}:${LLM_CONNECT_REMOTE_REPO}/deploy/k8s/activity-core-llm-connect"
    ssh "$CLUSTER_HOST" "mkdir -p $(quote "$LLM_CONNECT_REMOTE_REPO")/deploy/k8s/activity-core-llm-connect"
    rsync -a --delete \
      "$LLM_CONNECT_REPO/deploy/k8s/activity-core-llm-connect/" \
      "${CLUSTER_HOST}:${LLM_CONNECT_REMOTE_REPO}/deploy/k8s/activity-core-llm-connect/"
  fi
  log "applying llm-connect overlay"
  cluster_bash "$(cat <<EOF
set -euo pipefail
kubectl apply -k $(quote "$LLM_CONNECT_REMOTE_REPO")/deploy/k8s/activity-core-llm-connect
kubectl -n $(quote "$NAMESPACE") rollout status deploy/$(quote "$DEPLOYMENT_NAME") --timeout=180s
EOF
)"
fi

DEPLOYMENT_STATUS="$(
  cluster_bash "$(cat <<EOF
set -euo pipefail
if kubectl -n $(quote "$NAMESPACE") get deploy $(quote "$DEPLOYMENT_NAME") >/dev/null 2>&1; then
  kubectl -n $(quote "$NAMESPACE") get deploy $(quote "$DEPLOYMENT_NAME") -o jsonpath='{.status.readyReplicas}/{.status.replicas}'
else
  printf missing
fi
EOF
)"
)"
export DEPLOYMENT_STATUS

if [[ "$DEPLOYMENT_STATUS" == "missing" || "$DEPLOYMENT_STATUS" != "1/1" ]]; then
  EVIDENCE_STATUS="blocked"
  FAILING_GATE="llm-connect deployment not ready (${DEPLOYMENT_STATUS})"
  SMOKE_STATUS="blocked"
  SMOKE_SUMMARY="deployment must be ready before smoke"
  export EVIDENCE_STATUS FAILING_GATE SMOKE_STATUS SMOKE_SUMMARY
  post_evidence
  [[ "$REQUIRE_SMOKE" == "1" ]] && exit 1
  exit 0
fi

log "running in-namespace llm-connect fixture smoke"
set +e
SMOKE_OUTPUT="$(
  cluster_bash "$(cat <<EOF
set -euo pipefail
kubectl -n $(quote "$NAMESPACE") run llm-connect-smoke-\$(date +%s) \\
  --rm -i --restart=Never \\
  --image=llm-connect:latest \\
  --image-pull-policy=Never \\
  --env=LLM_CONNECT_URL=$(quote "$EXPECTED_URL") \\
  --env=LLM_CONNECT_TIMEOUT_SECONDS=$(quote "$EXPECTED_TIMEOUT") \\
  --command -- sh -c 'for i in 1 2 3 4 5 6; do sleep 5; python scripts/smoke_activity_core_endpoint.py && exit 0; echo "smoke attempt \$i failed; retrying"; done; exit 1'
EOF
)" 2>&1
)"
SMOKE_CODE=$?
set -e

if [[ "$SMOKE_CODE" == "0" ]]; then
  SMOKE_STATUS="passed"
  SMOKE_SUMMARY="$SMOKE_OUTPUT"
  EVIDENCE_STATUS="passed"
  FAILING_GATE=""
else
  SMOKE_STATUS="failed"
  SMOKE_SUMMARY="$(printf '%s' "$SMOKE_OUTPUT" | tail -n 5)"
  EVIDENCE_STATUS="failed"
  FAILING_GATE="llm-connect fixture smoke failed"
fi
export SMOKE_STATUS SMOKE_SUMMARY EVIDENCE_STATUS FAILING_GATE
post_evidence
exit "$SMOKE_CODE"

