From 32a7cf5b25308b06e9b0d03f5f19244d99c33fa5 Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Mon, 15 Jun 2026 18:45:07 +0330 Subject: [PATCH] ops: nightly DB backup + self-hosted uptime monitoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backup (production data-loss protection — was none): - meezi-backup sidecar in docker-compose.yml runs pg_dump nightly at 02:00 Tehran, gzip, 14-day rotation, atomic .partial→final, into ./backups (persists across deploys; rsync off-box per RESTORE.md). - Wired into the deploy job (up -d --no-deps backup); takes one dump on boot. - scripts/backup/pg-backup-loop.sh + RESTORE.md (restore + off-box guidance). Monitoring: - docker-compose.monitoring.yml: Uptime Kuma stack (own volume), stood up once, independent of app deploys. - Caddyfile status.{$DOMAIN} route; docs/monitoring.md lists the exact monitors (incl. /q guest-menu 200 check) + TLS-expiry alerts (catches the ~90-day cert breakage early) + alert-channel setup. Co-Authored-By: Claude Opus 4.8 --- .gitea/workflows/ci-cd.yml | 5 +++ Caddyfile | 8 ++++ docker-compose.monitoring.yml | 29 +++++++++++++++ docker-compose.yml | 24 ++++++++++++ docs/monitoring.md | 47 ++++++++++++++++++++++++ scripts/backup/RESTORE.md | 55 ++++++++++++++++++++++++++++ scripts/backup/pg-backup-loop.sh | 63 ++++++++++++++++++++++++++++++++ 7 files changed, 231 insertions(+) create mode 100644 docker-compose.monitoring.yml create mode 100644 docs/monitoring.md create mode 100644 scripts/backup/RESTORE.md create mode 100644 scripts/backup/pg-backup-loop.sh diff --git a/.gitea/workflows/ci-cd.yml b/.gitea/workflows/ci-cd.yml index 99d2fd6..2e1f63e 100644 --- a/.gitea/workflows/ci-cd.yml +++ b/.gitea/workflows/ci-cd.yml @@ -446,6 +446,11 @@ jobs: -f docker-compose.admin.yml \ up -d --no-deps admin-web + - name: Start nightly DB backup + # Sidecar that pg_dumps meezi-db nightly into ./backups (14-day retention). + # --no-deps so it doesn't try to (re)start postgres which isn't compose-managed. + run: docker compose up -d --no-deps backup + - name: Show all running containers if: always() run: docker compose -f docker-compose.yml -f docker-compose.admin.yml ps diff --git a/Caddyfile b/Caddyfile index 32f171c..da3ced3 100644 --- a/Caddyfile +++ b/Caddyfile @@ -7,6 +7,7 @@ # Domains needed in DNS (all → same server IP): # meezi.ir, app.meezi.ir, api.meezi.ir, # koja.meezi.ir, admin.meezi.ir, admin-api.meezi.ir +# status.meezi.ir (only if the monitoring stack is running — see docs/monitoring.md) { email {$ACME_EMAIL} @@ -41,3 +42,10 @@ admin.{$DOMAIN} { admin-api.{$DOMAIN} { reverse_proxy admin-api:8080 } + +# ── Uptime monitoring (Uptime Kuma) ────────────────────────────────────────── +# Only resolves if the monitoring stack is up (docker-compose.monitoring.yml). +# Caddy ignores upstreams that don't exist until the container is running. +status.{$DOMAIN} { + reverse_proxy uptime-kuma:3001 +} diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml new file mode 100644 index 0000000..5db2781 --- /dev/null +++ b/docker-compose.monitoring.yml @@ -0,0 +1,29 @@ +name: meezi + +# Self-hosted uptime monitoring for Meezi — Uptime Kuma. +# +# One-time stand-up (does NOT need redeploying with every app deploy): +# docker compose -f docker-compose.monitoring.yml up -d +# +# Then open https://status.meezi.ir (or http://SERVER:3201) and configure the +# monitors + alert channel as described in docs/monitoring.md. +# +# Config + history persist in the uptime_kuma_data volume. + +services: + uptime-kuma: + image: ${UPTIME_KUMA_IMAGE:-mirror.soroushasadi.com/louislam/uptime-kuma:1} + container_name: meezi-uptime-kuma + restart: unless-stopped + volumes: + - uptime_kuma_data:/app/data + ports: + - "${UPTIME_KUMA_PORT:-3201}:3001" + healthcheck: + test: ["CMD-SHELL", "node extra/healthcheck.js || exit 1"] + interval: 60s + timeout: 10s + retries: 3 + +volumes: + uptime_kuma_data: diff --git a/docker-compose.yml b/docker-compose.yml index f4c9e55..0b3de8d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -177,6 +177,30 @@ services: ports: - "${KOJA_PORT:-3103}:3000" + # Nightly Postgres backup — dumps the DB every night, keeps the last 14 days. + # Dumps land in the host ./backups dir (bind mount) so they survive a full + # container/volume wipe and can be rsync'd off-box. See scripts/backup/RESTORE.md. + backup: + image: ${POSTGRES_IMAGE:-mirror.soroushasadi.com/postgres:16-alpine} + container_name: meezi-backup + restart: unless-stopped + depends_on: + postgres: + condition: service_healthy + environment: + PGHOST: postgres + PGPORT: "5432" + PGUSER: meezi + PGPASSWORD: "${DB_PASSWORD:-meezi_local_pass}" + PGDATABASE: meezi + RETAIN_DAYS: "${BACKUP_RETAIN_DAYS:-14}" + BACKUP_HOUR: "${BACKUP_HOUR:-2}" + TZ: Asia/Tehran + entrypoint: ["/bin/sh", "/backup/pg-backup-loop.sh"] + volumes: + - ./scripts/backup:/backup:ro + - ${BACKUP_DIR:-./backups}:/backups + volumes: postgres_data: redis_data: diff --git a/docs/monitoring.md b/docs/monitoring.md new file mode 100644 index 0000000..757c96f --- /dev/null +++ b/docs/monitoring.md @@ -0,0 +1,47 @@ +# Meezi uptime monitoring (Uptime Kuma) + +Self-hosted uptime + TLS-expiry monitoring with alerting. Runs as a separate +compose stack so it stays up independently of app deploys. + +## Stand it up (one time, on the prod host) +```bash +cd /path/to/meezi +docker compose -f docker-compose.monitoring.yml up -d +``` +Then either: +- add a DNS A record `status.meezi.ir → server IP` and reload Caddy + (`docker exec meezi-caddy caddy reload` or restart the caddy stack) — the + `status.{$DOMAIN}` block is already in the Caddyfile, **or** +- reach it directly at `http://SERVER:3201` for the initial setup. + +First visit creates the admin account — set a strong password. + +## Monitors to add (in the Uptime Kuma UI) +Add one **HTTP(s)** monitor per public surface, interval 60s, accept 2xx/3xx: + +| Name | URL | Notes | +|------|-----|-------| +| Website | https://meezi.ir/fa | marketing | +| Dashboard | https://app.meezi.ir/fa/login | merchant panel | +| API health | https://api.meezi.ir/api/public/security-config | returns JSON 200 | +| Koja | https://koja.meezi.ir/fa | public discovery | +| Admin | https://admin.meezi.ir | internal panel | +| Guest menu | https://app.meezi.ir/q/healthcheck | should be 200 (not 500) | + +For each HTTPS monitor enable **"Certificate Expiry Notification"** — this +catches the recurring ~90-day Let's Encrypt cert-chain breakages early +(see the mirror-cert runbook). Set the threshold to 14 days. + +## Alerts +Settings → Notifications → add a channel (Telegram bot or email/SMTP), then +attach it to every monitor. Telegram is simplest: create a bot via @BotFather, +get the chat id, paste both into Uptime Kuma. + +## What this does NOT replace +- **Backups** — see `scripts/backup/RESTORE.md`. +- **Crash auto-recovery** — Docker `restart: unless-stopped` already restarts + crashed containers; Uptime Kuma tells you when one is flapping or down. + +## Status page (optional) +Uptime Kuma can publish a public status page (Settings → Status Pages) at +`status.meezi.ir/status/meezi` if you want customers to see uptime. diff --git a/scripts/backup/RESTORE.md b/scripts/backup/RESTORE.md new file mode 100644 index 0000000..00d23c9 --- /dev/null +++ b/scripts/backup/RESTORE.md @@ -0,0 +1,55 @@ +# Meezi database backup & restore + +## How backups work +The `meezi-backup` container (in `docker-compose.yml`) runs a nightly `pg_dump` +of the whole `meezi` database at **02:00 Asia/Tehran**, gzips it, and keeps the +**last 14 days** in the host `./backups` directory (override with `BACKUP_DIR`). +Filenames: `meezi_YYYYMMDD_HHMMSS.sql.gz`. One backup is also taken immediately +when the container first starts. + +Check it's running / list backups: +```bash +docker logs meezi-backup --tail 20 +ls -lh ./backups +``` + +## ⚠️ Copy backups OFF the server +The bind-mounted `./backups` survives a container/volume wipe, but **not a disk +failure**. Add an off-box copy (run from the host via cron), e.g.: +```bash +# rsync to another host nightly at 03:00 +0 3 * * * rsync -az --delete /path/to/meezi/backups/ user@backup-host:/srv/meezi-backups/ +``` +or `rclone copy ./backups remote:meezi-backups` to object storage. + +## Restore +1. Pick a dump: + ```bash + ls -lh ./backups # choose e.g. meezi_20260615_020000.sql.gz + ``` +2. (Recommended) stop the API so nothing writes mid-restore: + ```bash + docker stop meezi-api + ``` +3. Restore into the running Postgres container: + ```bash + gunzip -c ./backups/meezi_20260615_020000.sql.gz \ + | docker exec -i meezi-db psql -U meezi -d meezi + ``` + For a clean restore into an empty DB, drop & recreate first: + ```bash + docker exec -i meezi-db psql -U meezi -d postgres -c "DROP DATABASE meezi;" + docker exec -i meezi-db psql -U meezi -d postgres -c "CREATE DATABASE meezi OWNER meezi;" + gunzip -c ./backups/.sql.gz | docker exec -i meezi-db psql -U meezi -d meezi + ``` +4. Start the API again (it runs EF migrations on boot, which is a no-op if the + dump is current): + ```bash + docker start meezi-api + ``` + +## Manual one-off backup +```bash +docker exec meezi-db pg_dump -U meezi --no-owner --no-privileges meezi \ + | gzip -9 > ./backups/meezi_manual_$(date +%Y%m%d_%H%M%S).sql.gz +``` diff --git a/scripts/backup/pg-backup-loop.sh b/scripts/backup/pg-backup-loop.sh new file mode 100644 index 0000000..b07de77 --- /dev/null +++ b/scripts/backup/pg-backup-loop.sh @@ -0,0 +1,63 @@ +#!/bin/sh +# Nightly Postgres backup loop for Meezi. +# +# Runs inside a small postgres-image container (has pg_dump/gzip). Every day at +# ~02:00 Tehran it dumps the whole database, gzips it, and keeps the last +# RETAIN_DAYS files in /backups. Designed to be dead-simple and dependency-free: +# no cron daemon, just sleep-until-next-run so it survives container restarts. +# +# Env: +# PGHOST, PGUSER, PGPASSWORD, PGDATABASE — connection (from compose) +# RETAIN_DAYS — how many daily dumps to keep (default 14) +# BACKUP_HOUR — local hour to run (default 2 = 02:00) +set -eu + +RETAIN_DAYS="${RETAIN_DAYS:-14}" +BACKUP_HOUR="${BACKUP_HOUR:-2}" +OUT_DIR=/backups +export TZ="${TZ:-Asia/Tehran}" + +log() { echo "[pg-backup $(date '+%Y-%m-%d %H:%M:%S %Z')] $*"; } + +run_backup() { + ts=$(date '+%Y%m%d_%H%M%S') + tmp="$OUT_DIR/.meezi_${ts}.sql.gz.partial" + final="$OUT_DIR/meezi_${ts}.sql.gz" + log "starting dump → $final" + # pg_dump streams to gzip; .partial then atomic rename so a crash never + # leaves a truncated file that looks like a good backup. + if pg_dump --no-owner --no-privileges | gzip -9 > "$tmp"; then + mv "$tmp" "$final" + size=$(wc -c < "$final" 2>/dev/null || echo '?') + log "done ($size bytes)" + else + rm -f "$tmp" + log "ERROR: dump failed" + return 1 + fi + # Rotate: delete dumps older than RETAIN_DAYS days. + find "$OUT_DIR" -maxdepth 1 -name 'meezi_*.sql.gz' -mtime "+${RETAIN_DAYS}" -print -delete | while read -r f; do + log "rotated out $f" + done +} + +seconds_until_next_run() { + now_h=$(date '+%-H'); now_m=$(date '+%-M'); now_s=$(date '+%-S') + now=$(( now_h * 3600 + now_m * 60 + now_s )) + target=$(( BACKUP_HOUR * 3600 )) + if [ "$now" -lt "$target" ]; then + echo $(( target - now )) + else + echo $(( 86400 - now + target )) + fi +} + +log "backup loop started (retain ${RETAIN_DAYS}d, daily at ${BACKUP_HOUR}:00 ${TZ})" +# Take one backup immediately on first boot so we never sit a full day with none. +run_backup || true +while true; do + wait_s=$(seconds_until_next_run) + log "next backup in ${wait_s}s" + sleep "$wait_s" + run_backup || true +done