railiance-infra/tools/forgejo-restore-drill.sh
tegwick 092315895f RAIL-HO-WP-0005-T09: Forgejo backup/restore drill assets and evidence
Add isolated-namespace restore drill (CNPG cluster, PVC, orchestration script)
and document successful 2026-07-04 run: production forgejo dump restored with
health 200 and pilot repos visible via API. Scheduled backups remain open.
2026-07-04 11:26:50 +02:00

115 lines
No EOL
5 KiB
Bash
Executable file

#!/usr/bin/env bash
# Non-production Forgejo backup/restore drill (RAIL-HO-WP-0005-T09).
# Re-run: DRILL_CLEAN=1 ./tools/forgejo-restore-drill.sh (wipes namespace first)
set -euo pipefail
KUBECONFIG="${KUBECONFIG:-$HOME/.kube/config-hosteurope}"
export KUBECONFIG
NS=forgejo-restore-drill
DRILL_CLEAN="${DRILL_CLEAN:-0}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
BACKUP_LOCAL="${BACKUP_LOCAL:-/tmp/forgejo-drill/forgejo-drill-backup.zip}"
PROD_POD="${PROD_POD:-$(kubectl get pods -n forgejo -l app.kubernetes.io/instance=forgejo -o jsonpath='{.items[0].metadata.name}')}"
step() { echo "==> $*"; }
if [[ "${DRILL_CLEAN}" == "1" ]]; then
step "Clean prior drill namespace ${NS}"
kubectl delete namespace "${NS}" --wait=true --timeout=5m || true
fi
step "Create namespace ${NS}"
kubectl create namespace "${NS}" --dry-run=client -o yaml | kubectl apply -f -
step "Copy forgejo-db-credentials into ${NS}"
kubectl get secret forgejo-db-credentials -n databases -o json \
| python3 -c "import json,sys; s=json.load(sys.stdin); s['metadata']={k:v for k,v in s['metadata'].items() if k in ('name','labels','annotations')}; s['metadata']['namespace']='${NS}'; print(json.dumps(s))" \
| kubectl apply -f -
step "Deploy restore CNPG cluster"
kubectl apply -f "${ROOT_DIR}/infra/forgejo-restore-drill/forgejo-db-restore-cluster.yaml"
kubectl wait --for=condition=Ready cluster/forgejo-db-restore -n "${NS}" --timeout=10m
step "Ensure local backup exists"
if [[ ! -f "${BACKUP_LOCAL}" ]]; then
kubectl exec -n forgejo "${PROD_POD}" -c gitea -- forgejo dump -f /tmp/forgejo-drill-backup.zip
mkdir -p "$(dirname "${BACKUP_LOCAL}")"
kubectl cp "forgejo/${PROD_POD}:/tmp/forgejo-drill-backup.zip" "${BACKUP_LOCAL}" -c gitea
fi
ls -lh "${BACKUP_LOCAL}"
step "Apply restore PVC"
kubectl apply -f "${ROOT_DIR}/infra/forgejo-restore-drill/restore-job.yaml"
step "Run restore pod (stage backup, import files + SQL)"
kubectl delete pod forgejo-restore-import -n "${NS}" --ignore-not-found --wait=true
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: forgejo-restore-import
namespace: ${NS}
spec:
restartPolicy: Never
containers:
- name: restore
image: code.forgejo.org/forgejo/forgejo:11.0.3
command: ["sleep", "3600"]
volumeMounts:
- name: data
mountPath: /data
- name: backup
mountPath: /backup
volumes:
- name: data
persistentVolumeClaim:
claimName: forgejo-restore-data
- name: backup
emptyDir: {}
EOF
kubectl wait --for=condition=Ready pod/forgejo-restore-import -n "${NS}" --timeout=3m
kubectl cp "${BACKUP_LOCAL}" "${NS}/forgejo-restore-import:/backup/forgejo-drill-backup.zip" -c restore
DB_PASS="$(kubectl get secret forgejo-db-credentials -n "${NS}" -o jsonpath='{.data.password}' | base64 -d)"
kubectl exec -n "${NS}" forgejo-restore-import -c restore -- env POSTGRES_PASSWORD="${DB_PASS}" sh -c '
set -eu
apk add --no-cache unzip postgresql-client >/dev/null
rm -rf /data/*
mkdir -p /data/git/gitea-repositories
unzip -q /backup/forgejo-drill-backup.zip -d /tmp/dump
cp -a /tmp/dump/repos/. /data/git/gitea-repositories/
cp -a /tmp/dump/data/. /data/
chown -R git:git /data
PGPASSWORD="${POSTGRES_PASSWORD}" psql -h forgejo-db-restore-rw.forgejo-restore-drill.svc.cluster.local -U forgejo -d forgejo -v ON_ERROR_STOP=1 -f /tmp/dump/forgejo-db.sql
echo restore-import-ok
'
unset DB_PASS
kubectl delete pod forgejo-restore-import -n "${NS}" --wait=true
step "Deploy isolated Forgejo release"
cd "${HOME}/railiance-apps"
DB_PASS="$(kubectl get secret forgejo-db-credentials -n "${NS}" -o jsonpath='{.data.password}' | base64 -d)"
helm upgrade --install forgejo-restore gitea-charts/gitea --version 12.5.0 \
--namespace "${NS}" --create-namespace \
-f helm/forgejo-values.yaml \
-f helm/forgejo-registry-values.yaml \
--set strategy.type=Recreate \
--set persistence.existingClaim=forgejo-restore-data \
--set gitea.config.database.HOST=forgejo-db-restore-rw.${NS}.svc.cluster.local:5432 \
--set gitea.config.database.PASSWD="${DB_PASS}" \
--set gitea.config.server.DOMAIN=forgejo-restore.local \
--set gitea.config.server.ROOT_URL=http://forgejo-restore.local:3000/ \
--set gitea.admin.password=restore-drill-local-only \
--set ingress.enabled=false \
--wait --timeout=10m
unset DB_PASS
step "Post-restore checks via port-forward"
kubectl port-forward -n "${NS}" svc/forgejo-restore-gitea-http 13000:3000 >/tmp/forgejo-restore-pf.log 2>&1 &
PF_PID=$!
sleep 5
curl -fsS -o /dev/null -w 'health:%{http_code}\n' http://127.0.0.1:13000/
curl -fsS http://127.0.0.1:13000/api/v1/repos/coulomb/glas-harness | python3 -c "import json,sys; d=json.load(sys.stdin); print('repo', d.get('full_name'), d.get('default_branch'))"
curl -fsS http://127.0.0.1:13000/api/v1/repos/coulomb/key-cape | python3 -c "import json,sys; d=json.load(sys.stdin); print('repo', d.get('full_name'), d.get('default_branch'))"
kill "${PF_PID}" 2>/dev/null || true
echo "restore-drill-complete"