#!/usr/bin/env bash
set -euo pipefail

export DEBIAN_FRONTEND="${DEBIAN_FRONTEND:-noninteractive}"
export PYTHONWARNINGS="${PYTHONWARNINGS:-ignore::SyntaxWarning}"

log_file="/var/log/rizoma-installer.log"
mkdir -p "$(dirname "$log_file")"
touch "$log_file"

# Secure logging function - masks sensitive data
log_masked() {
  local msg="$1"
  echo "[$(date '+%Y-%m-%d %H:%M:%S')] $msg" | tee -a "$log_file"
}

print_secret_line() {
  local msg="$1"
  if [ -w /dev/tty ]; then
    printf '%s\n' "$msg" > /dev/tty
  else
    printf '%s\n' "$msg"
  fi
}

generate_supervisor_pin() {
  local value
  value="$(od -An -N4 -tu4 /dev/urandom | awk '{print $1 % 1000000}')"
  printf '%06d\n' "$value"
}

# Redirect stdout/stderr to log, but be careful with sensitive data
exec > >(tee -a "$log_file") 2>&1

if [ "$(id -u)" -ne 0 ]; then
  echo "run as root"
  exit 1
fi

# Trap to clean up sensitive files on exit
cleanup_sensitive() {
  # Remove temporary sensitive files
  if [ -n "${PGPASSFILE:-}" ] && [ -f "$PGPASSFILE" ]; then
    rm -f "$PGPASSFILE"
    log_masked "Cleaned up .pgpass file"
  fi
  # Clean up any temporary payload files
  rm -f /tmp/rizoma-user-payload.* 2>/dev/null || true
  rm -f /tmp/rizoma-pgpass.* 2>/dev/null || true
  rm -f /tmp/rizoma-keyring.* 2>/dev/null || true
  # Clear sensitive variables from environment
  unset admin_password admin_password_confirm db_password api_token supervisor_pin 2>/dev/null || true
  log_masked "Sensitive variables cleared from environment"
}
trap cleanup_sensitive EXIT

echo ""
echo "  ╔══════════════════════════════════════╗"
echo "  ║       Rizoma Platform Installer      ║"
echo "  ╚══════════════════════════════════════╝"
echo ""

prompt_value() {
  local label="$1"
  local current="$2"
  local fallback="$3"
  local value=""
  if [ -n "$current" ]; then
    echo "$current"
    return
  fi
  read -r -p "$label" value < /dev/tty
  if [ -z "$value" ]; then
    value="$fallback"
  fi
  echo "$value"
}

repo_url="${RIZOMA_REPO_URL:-https://repo.rizomasec.ru}"
channel="${RIZOMA_CHANNEL:-stable}"
domain="$(prompt_value "Platform subdomain (e.g., mesh.example.com): " "${RIZOMA_DOMAIN:-}" "")"
admin_email="$(prompt_value "Supervisor email: " "${RIZOMA_ADMIN_EMAIL:-}" "")"
admin_name="$(prompt_value "Supervisor name: " "${RIZOMA_ADMIN_NAME:-}" "")"

# Password must be provided interactively for security (H-06 Fix: environment variable removed)
# Password with confirmation - using read -s to hide input
while true; do
  read -r -s -p "Supervisor password (min 8 chars): " admin_password < /dev/tty
  echo
  read -r -s -p "Confirm password: " admin_password_confirm < /dev/tty
  echo
  if [ "$admin_password" != "$admin_password_confirm" ]; then
    echo "  Passwords do not match. Try again."
    continue
  fi
  if [ "${#admin_password}" -lt 8 ]; then
    echo "  Password must be at least 8 characters. Try again."
    continue
  fi
  break
done

# Clear password confirmation immediately
unset admin_password_confirm

# --- Validate inputs ---
if [ -z "$domain" ]; then
  echo "ERROR: domain is required"; exit 1
fi
if [ -z "$admin_email" ] || [ -z "$admin_name" ] || [ -z "$admin_password" ]; then
  echo "ERROR: supervisor credentials are required"; exit 1
fi
if ! echo "$admin_email" | grep -qE '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'; then
  echo "ERROR: supervisor email format is invalid"; exit 1
fi

# --- Detect package manager ---
pkg_manager=""
if command -v apt-get >/dev/null 2>&1; then
  pkg_manager="apt"
elif command -v dnf >/dev/null 2>&1; then
  pkg_manager="dnf"
elif command -v yum >/dev/null 2>&1; then
  pkg_manager="yum"
else
  echo "ERROR: no supported package manager (apt/dnf/yum)"; exit 1
fi

disable_conflicting_firewalls() {
  echo "  Checking for conflicting firewall managers..."

  if command -v systemctl >/dev/null 2>&1; then
    if systemctl list-unit-files | grep -q '^ufw\.service'; then
      if systemctl is-active --quiet ufw 2>/dev/null; then
        echo "  Disabling active UFW firewall..."
        ufw --force disable >/dev/null 2>&1 || systemctl disable --now ufw >/dev/null 2>&1 || true
      fi
      systemctl disable ufw >/dev/null 2>&1 || true
    fi

    if systemctl list-unit-files | grep -q '^firewalld\.service'; then
      if systemctl is-active --quiet firewalld 2>/dev/null; then
        echo "  Disabling active firewalld firewall..."
        systemctl disable --now firewalld >/dev/null 2>&1 || true
      fi
      systemctl disable firewalld >/dev/null 2>&1 || true
    fi
  fi
}

install_apt_keyring() {
  local tmp_key
  tmp_key="$(mktemp /tmp/rizoma-keyring.XXXXXX)"
  curl -fsS "$gpg_key_url" -o "$tmp_key"
  if grep -q "BEGIN PGP PUBLIC KEY BLOCK" "$tmp_key"; then
    gpg --batch --yes --dearmor -o /usr/share/keyrings/rizoma-archive-keyring.gpg "$tmp_key"
  else
    install -m 0644 "$tmp_key" /usr/share/keyrings/rizoma-archive-keyring.gpg
  fi
  rm -f "$tmp_key"
}

setup_platform_firewall_enforcement() {
  echo "  Preparing WAF host-firewall enforcement..."

  if ! command -v nft >/dev/null 2>&1; then
    echo "  nftables unavailable; WAF will still block HTTP requests but host bans are disabled"
    return 0
  fi

  systemctl enable --now nftables >/dev/null 2>&1 || true
  nft list table inet rizoma >/dev/null 2>&1 || nft add table inet rizoma >/dev/null 2>&1 || true
  nft list set inet rizoma f2b_bans >/dev/null 2>&1 || \
    nft add set inet rizoma f2b_bans '{ type ipv4_addr; flags timeout; size 1000000; }' >/dev/null 2>&1 || true
  nft list set inet rizoma f2b_bans6 >/dev/null 2>&1 || \
    nft add set inet rizoma f2b_bans6 '{ type ipv6_addr; flags timeout; size 1000000; }' >/dev/null 2>&1 || true
  nft list chain inet rizoma f2b_check >/dev/null 2>&1 || \
    nft add chain inet rizoma f2b_check '{ type filter hook input priority -50; policy accept; }' >/dev/null 2>&1 || true
  nft list chain inet rizoma policy_input >/dev/null 2>&1 || \
    nft add chain inet rizoma policy_input '{ type filter hook input priority 0; policy accept; }' >/dev/null 2>&1 || true
  nft list chain inet rizoma policy_output >/dev/null 2>&1 || \
    nft add chain inet rizoma policy_output '{ type filter hook output priority 0; policy accept; }' >/dev/null 2>&1 || true
  # WAF bans must not block SSH or other operator recovery paths. Keep the
  # Fail2ban set reusable, but scope this chain to public HTTP/HTTPS ingress.
  nft flush chain inet rizoma f2b_check >/dev/null 2>&1 || true
  nft add rule inet rizoma f2b_check ip saddr @f2b_bans tcp dport 80 drop >/dev/null 2>&1 || true
  nft add rule inet rizoma f2b_check ip saddr @f2b_bans tcp dport 443 drop >/dev/null 2>&1 || true
  nft add rule inet rizoma f2b_check ip6 saddr @f2b_bans6 tcp dport 80 drop >/dev/null 2>&1 || true
  nft add rule inet rizoma f2b_check ip6 saddr @f2b_bans6 tcp dport 443 drop >/dev/null 2>&1 || true

  if ! command -v fail2ban-client >/dev/null 2>&1; then
    echo "  fail2ban unavailable; WAF host-ban escalation is disabled"
    return 0
  fi

  mkdir -p /etc/fail2ban/action.d /etc/fail2ban/filter.d /etc/fail2ban/jail.d
  cat > /etc/fail2ban/action.d/nftables-rizoma-ban.conf <<'EOF'
# Rizoma Fail2ban nftables action
# Generated by the Rizoma platform installer.
# The inet rizoma f2b_check chain scopes these sets to HTTP/HTTPS only.

[Definition]
actionstart =
actionstop =
actioncheck =
actionban = nft add element inet rizoma f2b_bans { <ip> timeout <bantime>s } 2>/dev/null || true
actionunban = nft delete element inet rizoma f2b_bans { <ip> } 2>/dev/null || true

[Init]
actionban6 = nft add element inet rizoma f2b_bans6 { <ip> timeout <bantime>s } 2>/dev/null || true
actionunban6 = nft delete element inet rizoma f2b_bans6 { <ip> } 2>/dev/null || true
EOF

  cat > /etc/fail2ban/filter.d/rizoma-waf.conf <<'EOF'
# Rizoma WAF block filter
# Generated by the Rizoma platform installer.

[Definition]
failregex = security_event event=waf_block ip=<HOST>.*host_ban=true.*component=waf
            security_event event=waf_pressure_block ip=<HOST>.*component=waf
ignoreregex =
EOF

  cat > /etc/fail2ban/jail.d/rizoma-waf.local <<'EOF'
# Rizoma WAF jails
# Generated by the Rizoma platform installer.

[rizoma-ingress-waf]
enabled = true
port = http,https
filter = rizoma-waf
backend = systemd
journalmatch = _SYSTEMD_UNIT=rizoma-ingress.service + SYSLOG_IDENTIFIER=rizoma-ingress
maxretry = 3
findtime = 600
bantime = 600
ignoreip = 127.0.0.1/8 ::1 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 100.64.0.0/10 169.254.0.0/16 fc00::/7 fe80::/10
action = nftables-rizoma-ban
EOF

  if fail2ban-client -d >/dev/null 2>&1; then
    systemctl enable --now fail2ban >/dev/null 2>&1 || true
    fail2ban-client reload >/dev/null 2>&1 || true
    echo "  WAF host-firewall enforcement ready"
  else
    echo "  fail2ban config validation failed; WAF host-ban escalation is disabled"
  fi
}

# --- Validate DNS ---
echo "[1/8] Validating DNS..."
public_ip="$(curl -fsS https://api.ipify.org)"
dns_ips="$(getent ahosts "$domain" | awk '{print $1}' | sort -u)"
if ! echo "$dns_ips" | grep -qx "$public_ip"; then
  echo "ERROR: DNS for $domain does not resolve to $public_ip"
  echo "  Resolved: $dns_ips"
  echo "  Expected: $public_ip"
  exit 1
fi
echo "  DNS OK: $domain -> $public_ip"

ingress_bind_ip="${RIZOMA_INGRESS_BIND_IP:-$public_ip}"
ingress_acme_cache="${RIZOMA_INGRESS_ACME_CACHE:-/var/lib/rizoma/acme/ingress}"
if [ -z "$ingress_bind_ip" ] || [ "$ingress_bind_ip" = "0.0.0.0" ] || [ "$ingress_bind_ip" = "::" ] || [ "$ingress_bind_ip" = "[::]" ]; then
  echo "ERROR: ingress edge requires an explicit bind IP"
  echo "  Set RIZOMA_INGRESS_BIND_IP to the interface address that should serve HTTPS."
  exit 1
fi
ingress_bind_addr="$(printf '%s' "$ingress_bind_ip" | sed 's/^\[//; s/\]$//')"
case "$ingress_bind_addr" in
  127.*|::1|localhost)
    echo "ERROR: ingress edge bind IP must be reachable from the public Internet"
    echo "  Got loopback address: $ingress_bind_ip"
    exit 1
    ;;
esac
if command -v ip >/dev/null 2>&1; then
  if ! ip -o addr show | awk '{print $4}' | cut -d/ -f1 | grep -qxF "$ingress_bind_addr"; then
    echo "ERROR: ingress edge bind IP is not assigned to this host: $ingress_bind_ip"
    echo "  Set RIZOMA_INGRESS_BIND_IP to a local interface address that receives traffic for $domain."
    exit 1
  fi
fi
ingress_listen_host="$ingress_bind_addr"
ingress_resolve_ip="$ingress_bind_addr"
case "$ingress_bind_addr" in
  *:*)
    ingress_listen_host="[$ingress_bind_addr]"
    ingress_resolve_ip="[$ingress_bind_addr]"
    ;;
esac

# --- Prepare ingress edge ---
echo "[2/8] Preparing ingress edge..."
if command -v systemctl >/dev/null 2>&1; then
  systemctl disable --now caddy >/dev/null 2>&1 || true
fi
echo "  Ingress bind: $ingress_listen_host:443"
echo "  Ingress ACME/redirect: $ingress_listen_host:80"

# --- Install Rizoma packages ---
echo "[3/8] Installing Rizoma packages..."
gpg_key_url="${RIZOMA_GPG_KEY_URL:-$repo_url/keys/rizoma-archive-keyring.gpg}"

# Stop services if running from a previous install
systemctl stop rizoma-dashboard >/dev/null 2>&1 || true
systemctl stop rizoma-coordinator >/dev/null 2>&1 || true
systemctl stop rizoma-relay >/dev/null 2>&1 || true
systemctl stop rizoma-ingress >/dev/null 2>&1 || true

disable_conflicting_firewalls

if [ "$pkg_manager" = "apt" ]; then
  apt-get install -y -qq curl ca-certificates gnupg
  mkdir -p /usr/share/keyrings
  install_apt_keyring
  echo "deb [signed-by=/usr/share/keyrings/rizoma-archive-keyring.gpg] $repo_url/apt/$channel ./" > /etc/apt/sources.list.d/rizoma.list
  apt-get update -qq
  apt-get install -y -qq jq openssl postgresql nftables rizoma-coordinator rizoma-dashboard rizoma-relay rizoma-ingress
  apt-get install -y -qq fail2ban >/dev/null 2>&1 || echo "  fail2ban package unavailable; WAF host-ban escalation will be disabled"
else
  dnf -y install curl ca-certificates jq openssl postgresql-server postgresql nftables rizoma-coordinator rizoma-dashboard rizoma-relay rizoma-ingress 2>/dev/null || \
  yum -y install curl ca-certificates jq openssl postgresql-server postgresql nftables rizoma-coordinator rizoma-dashboard rizoma-relay rizoma-ingress
  dnf -y install fail2ban >/dev/null 2>&1 || yum -y install fail2ban >/dev/null 2>&1 || echo "  fail2ban package unavailable; WAF host-ban escalation will be disabled"
  # DNF/YUM repo setup would go here for RPM
fi
systemctl enable --now nftables >/dev/null 2>&1 || true
setup_platform_firewall_enforcement
echo "  Packages installed"

# --- Create system users and groups ---
echo "[4/8] Creating system users..."

# Create dedicated system users for each service
for svc in coordinator relay dashboard ingress; do
  if ! getent group "rizoma-$svc" >/dev/null 2>&1; then
    groupadd --system "rizoma-$svc" 2>/dev/null || true
  fi
  if ! getent passwd "rizoma-$svc" >/dev/null 2>&1; then
    useradd --system --no-create-home --shell /usr/sbin/nologin \
      --gid "rizoma-$svc" \
      --home-dir "/var/lib/rizoma/$svc" \
      "rizoma-$svc" 2>/dev/null || true
  fi
done

# Set proper ownership on base directories
mkdir -p /etc/rizoma /var/lib/rizoma /var/log/rizoma
chmod 711 /etc/rizoma
chmod 755 /var/lib/rizoma /var/log/rizoma

# Create and set ownership for per-service directories
for svc in coordinator relay dashboard ingress; do
  mkdir -p "/var/lib/rizoma/$svc" "/var/log/rizoma/$svc"
  chown -R "rizoma-$svc:rizoma-$svc" "/var/lib/rizoma/$svc" "/var/log/rizoma/$svc"
  chmod 750 "/var/lib/rizoma/$svc" "/var/log/rizoma/$svc"
done
mkdir -p "$ingress_acme_cache"
chown -R rizoma-ingress:rizoma-ingress "$ingress_acme_cache"
chmod 700 "$ingress_acme_cache"

echo "  System users created"

# --- Setup PostgreSQL ---
echo "[5/8] Setting up PostgreSQL..."
if command -v postgresql-setup >/dev/null 2>&1; then
  postgresql-setup --initdb 2>/dev/null || true
fi

# Start PostgreSQL - handle both Debian/Ubuntu (version-specific) and RHEL/CentOS
if systemctl list-unit-files | grep -q 'postgresql@.*service'; then
  # Debian/Ubuntu: Start the version-specific service
  PG_VERSION=$(ls /etc/postgresql/ 2>/dev/null | head -n1)
  if [ -n "$PG_VERSION" ]; then
    echo "  Starting PostgreSQL $PG_VERSION..."
    systemctl enable --now "postgresql@${PG_VERSION}-main" 2>/dev/null || \
    systemctl enable --now postgresql 2>/dev/null || true
  else
    systemctl enable --now postgresql
  fi
else
  # RHEL/CentOS/Fedora
  systemctl enable --now postgresql
fi

# Wait for PostgreSQL to actually accept connections
echo "  Waiting for PostgreSQL to accept connections..."
pg_ready=false
for i in $(seq 1 30); do
  if sudo -u postgres psql -tAc "SELECT 1" >/dev/null 2>&1; then
    echo "  PostgreSQL ready (attempt $i/30)"
    pg_ready=true
    break
  fi
  if [ $((i % 5)) -eq 0 ]; then
    echo "  Still waiting for PostgreSQL... (attempt $i/30)"
    # Check if PostgreSQL process is actually running
    if ! pgrep -x postgres >/dev/null 2>&1; then
      echo "  ERROR: PostgreSQL process is not running!"
      echo "  Check PostgreSQL logs: journalctl -u postgresql* --no-pager -n 20"
      exit 1
    fi
  fi
  sleep 1
done

if [ "$pg_ready" != "true" ]; then
  echo "  ERROR: PostgreSQL not ready after 30 seconds"
  echo "  Check PostgreSQL status: systemctl status postgresql*"
  echo "  Check PostgreSQL logs: journalctl -u postgresql* --no-pager -n 30"
  exit 1
fi

db_user="${RIZOMA_DB_USER:-rizoma_app}"
db_name="${RIZOMA_DB_NAME:-rizoma_coordinator}"
db_password="${RIZOMA_DB_PASSWORD:-$(openssl rand -hex 24)}"

# Create .pgpass file for secure PostgreSQL authentication
# Format: hostname:port:database:username:password
PGPASSFILE="$(mktemp /tmp/rizoma-pgpass.XXXXXX)"
chmod 600 "$PGPASSFILE"
echo "127.0.0.1:5432:$db_name:$db_user:$db_password" > "$PGPASSFILE"
echo "localhost:5432:$db_name:$db_user:$db_password" >> "$PGPASSFILE"
export PGPASSFILE

log_masked "PostgreSQL .pgpass file created at $PGPASSFILE"

db_url="postgres://$db_user:$db_password@127.0.0.1:5432/$db_name?sslmode=disable"

if [ -f /etc/rizoma/coordinator.env ]; then
  existing_db_url="$(grep -E '^RIZOMA_COORDINATOR_DB_URL=' /etc/rizoma/coordinator.env | sed 's/^RIZOMA_COORDINATOR_DB_URL=//' || true)"
  if [ -n "$existing_db_url" ]; then
    db_url="$existing_db_url"
  fi
fi

# Use .pgpass for authentication instead of password in command line
if [ "$db_url" = "postgres://$db_user:$db_password@127.0.0.1:5432/$db_name?sslmode=disable" ]; then
  echo "  Creating database user and database..."
  
  # Check if user exists using .pgpass
  user_exists="$(PGPASSFILE="$PGPASSFILE" sudo -u postgres psql -tAc "SELECT 1 FROM pg_roles WHERE rolname='$db_user'" 2>/dev/null || echo "")"
  
  if [ "$user_exists" != "1" ]; then
    echo "  Creating user: $db_user"
    PGPASSFILE="$PGPASSFILE" sudo -u postgres psql -c "CREATE ROLE $db_user WITH LOGIN PASSWORD '$db_password';" 2>/dev/null || \
    {
      echo "  ERROR: Failed to create database user"
      exit 1
    }
  else
    echo "  User already exists, updating password"
    PGPASSFILE="$PGPASSFILE" sudo -u postgres psql -c "ALTER ROLE $db_user WITH PASSWORD '$db_password';" 2>/dev/null || true
  fi
  
  # Check if database exists
  db_exists="$(PGPASSFILE="$PGPASSFILE" sudo -u postgres psql -tAc "SELECT 1 FROM pg_database WHERE datname='$db_name'" 2>/dev/null || echo "")"
  
  if [ "$db_exists" != "1" ]; then
    echo "  Creating database: $db_name"
    PGPASSFILE="$PGPASSFILE" sudo -u postgres psql -c "CREATE DATABASE $db_name OWNER $db_user;" 2>/dev/null || \
    {
      echo "  ERROR: Failed to create database"
      exit 1
    }
  else
    echo "  Database already exists"
  fi
fi

log_masked "PostgreSQL database and user created"
echo "  PostgreSQL ready"

# --- Configure and start services ---
echo "[6/8] Configuring services..."
# Set ownership BEFORE creating files - CRITICAL for coordinator to write SQLite DB
# (Directories already created and owned in step 4)
chmod 711 /etc/rizoma

api_token="${RIZOMA_COORDINATOR_API_TOKEN:-supervisor+admin+read+write+heartbeat:$(openssl rand -hex 32)}"

log_masked "API token generated (will be displayed once at the end)"

# Pre-flight port conflict checks - CRITICAL to prevent silent failures
echo "  Checking for port conflicts..."
port_conflict=false

if ss -tlnp | grep -q ':8443 '; then
  echo "  ERROR: Port 8443 (coordinator) is already in use"
  echo "  Conflicting process: $(ss -tlnp | grep ':8443 ')"
  port_conflict=true
fi

if ss -tlnp | grep -q ':8080 '; then
  echo "  ERROR: Port 8080 (dashboard) is already in use"
  echo "  Conflicting process: $(ss -tlnp | grep ':8080 ')"
  port_conflict=true
fi

if ss -tlnp | grep -q ':8082 '; then
  echo "  ERROR: Port 8082 (relay) is already in use"
  echo "  Conflicting process: $(ss -tlnp | grep ':8082 ')"
  port_conflict=true
fi

if ss -tlnp | grep -qE ':(80|443) '; then
  echo "  ERROR: Port 80 or 443 (ingress edge) is already in use"
  echo "  Conflicting process: $(ss -tlnp | grep -E ':(80|443) ')"
  echo "  Stop the conflicting service before installing the ingress edge."
  port_conflict=true
fi

if [ "$port_conflict" = true ]; then
  echo ""
  echo "  ERROR: Port conflicts detected. Installation cannot proceed."
  echo "  Stop the conflicting services or set environment variables:"
  echo "    RIZOMA_COORDINATOR_LISTEN (default: 127.0.0.1:8443)"
  echo "    RIZOMA_DASHBOARD_LISTEN (default: 127.0.0.1:8080)"
  echo "    RIZOMA_RELAY_LISTEN (default: 127.0.0.1:8082)"
  echo "    RIZOMA_INGRESS_BIND_IP (default: $public_ip)"
  exit 1
fi

echo "  All ports available"

# Coordinator listens on localhost; rizoma-ingress owns public HTTP/HTTPS.
cat > /etc/rizoma/coordinator.env <<EOF
RIZOMA_COORDINATOR_LISTEN=127.0.0.1:8443
RIZOMA_COORDINATOR_DB_URL=$db_url
RIZOMA_COORDINATOR_API_TOKEN=$api_token
RIZOMA_COORDINATOR_INSECURE_HTTP=true
RIZOMA_COORDINATOR_RELAYS=$domain:4243
RIZOMA_COORDINATOR_STORE=/var/lib/rizoma/coordinator/coordinator.db
RIZOMA_COORDINATOR_IDENTITY=/var/lib/rizoma/coordinator/remote_ssh_mesh_identity.json
RIZOMA_COORDINATOR_FIREWALL_MODE=off
RIZOMA_INGRESS_PUBLIC_ADDRESSES=$public_ip
EOF

# Dashboard is a loopback app; ingress publishes it as the public platform UI.
cat > /etc/rizoma/dashboard.env <<EOF
RIZOMA_DASHBOARD_LISTEN=127.0.0.1:8080
RIZOMA_DASHBOARD_COORDINATOR_URL=http://127.0.0.1:8443
RIZOMA_DASHBOARD_API_TOKEN=$api_token
RIZOMA_DASHBOARD_INSECURE_HTTP=true
EOF

cat > /etc/rizoma/ingress.env <<EOF
RIZOMA_INGRESS_LISTEN=$ingress_listen_host:443
RIZOMA_INGRESS_ACME_HTTP=$ingress_listen_host:80
RIZOMA_INGRESS_ACME_EMAIL=$admin_email
RIZOMA_INGRESS_ACME_CACHE=$ingress_acme_cache
RIZOMA_INGRESS_COORDINATOR_URL=http://127.0.0.1:8443
RIZOMA_INGRESS_API_TOKEN=$api_token
RIZOMA_INGRESS_TOKEN=$api_token
RIZOMA_INGRESS_NAME=platform-ingress
RIZOMA_INGRESS_NODE_KIND=platform
RIZOMA_INGRESS_IDENTITY=/var/lib/rizoma/ingress/identity.json
RIZOMA_INGRESS_INSECURE_HTTP=true
RIZOMA_INGRESS_FIREWALL_MODE=off
RIZOMA_INGRESS_PUBLIC_ADDRESSES=$public_ip
EOF

# Set restrictive permissions on env files
chmod 600 /etc/rizoma/coordinator.env /etc/rizoma/dashboard.env /etc/rizoma/ingress.env
chown rizoma-coordinator:rizoma-coordinator /etc/rizoma/coordinator.env
chown rizoma-dashboard:rizoma-dashboard /etc/rizoma/dashboard.env
chown rizoma-ingress:rizoma-ingress /etc/rizoma/ingress.env

log_masked "Service configuration files created with restrictive permissions"

# Start coordinator first
echo "  Waiting for PostgreSQL to accept connections..."
pg_ready=false
for i in $(seq 1 30); do
  if PGPASSFILE="$PGPASSFILE" sudo -u postgres psql -d "$db_name" -tAc "SELECT 1" >/dev/null 2>&1; then
    echo "  PostgreSQL ready (attempt $i/30)"
    pg_ready=true
    break
  fi
  if [ $((i % 5)) -eq 0 ]; then
    echo "  Still waiting for PostgreSQL... (attempt $i/30)"
  fi
  sleep 1
done

if [ "$pg_ready" != "true" ]; then
  echo "  ERROR: PostgreSQL not ready after 30 seconds"
  echo "  Check PostgreSQL status: systemctl status postgresql"
  echo "  Check PostgreSQL logs: journalctl -u postgresql"
  exit 1
fi

systemctl enable --now rizoma-coordinator

# Wait for coordinator to be ready
echo "  Waiting for coordinator to start..."
coordinator_ready=false
for i in $(seq 1 60); do
  if curl -fsS --max-time 3 "http://127.0.0.1:8443/healthz" >/dev/null 2>&1; then
    echo "  Coordinator ready (attempt $i/60)"
    coordinator_ready=true
    break
  fi
  if [ $((i % 10)) -eq 0 ]; then
    echo "  Still waiting for coordinator... (attempt $i/60)"
  fi
  sleep 2
done

if [ "$coordinator_ready" != "true" ]; then
  echo "  ERROR: Coordinator failed to start after 120 seconds"
  echo "  Diagnosing issue..."
  echo "  --- Last 30 lines of coordinator logs ---"
  journalctl -u rizoma-coordinator --no-pager -n 30 || true
  echo "  --- Coordinator service status ---"
  systemctl status rizoma-coordinator --no-pager -l || true
  echo ""
  echo "  Common issues:"
  echo "  1. PostgreSQL connection failed - check DB URL in /etc/rizoma/coordinator.env"
  echo "  2. Port 8443 already in use - check: ss -tlnp | grep 8443"
  echo "  3. Permission denied - check: ls -la /etc/rizoma/coordinator.env"
  exit 1
fi

# Relay bootstrap: create a real relay record, persist relay identity, then
# issue a mesh certificate bound to the registered relay ID for mTLS heartbeats.
echo "  Bootstrapping relay..."
relay_public_addr="${RIZOMA_RELAY_QUIC_PUBLIC_ADDR:-$domain:4243}"
relay_quic_listen="${RIZOMA_RELAY_QUIC_LISTEN:-0.0.0.0:4243}"
relay_region="${RIZOMA_RELAY_RELAY_REGION:-local}"
relay_max_connections="${RIZOMA_RELAY_MAX_CONNECTIONS:-10000}"
relay_state_path="${RIZOMA_RELAY_RELAY_STATE:-/var/lib/rizoma/relay/relay.state}"
relay_bootstrap_ok=false

if [ "$coordinator_ready" != "true" ]; then
  echo "  ERROR: Skipping relay bootstrap - coordinator is not running"
else
  relay_token_resp="/tmp/rizoma-relay-token.json"
  relay_register_resp="/tmp/rizoma-relay-register.json"
  relay_meshca_resp="/tmp/rizoma-relay-meshca.json"
  relay_list_resp="/tmp/rizoma-relays.json"

  echo "  Reconciling local relay identity..."
  rm -f "$relay_state_path" 2>/dev/null || true
  list_relays_code=$(curl -s -w "%{http_code}" --max-time 15 \
    -H "Authorization: Bearer $api_token" \
    -o "$relay_list_resp" \
    "http://127.0.0.1:8443/v1/relays" || echo "000")
  if [ "$list_relays_code" = "200" ]; then
    while IFS= read -r existing_relay_id; do
      if [ -n "$existing_relay_id" ] && [ "$existing_relay_id" != "null" ]; then
        delete_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 15 \
          -H "Authorization: Bearer $api_token" \
          "http://127.0.0.1:8443/v1/relays/$existing_relay_id" -X DELETE || echo "000")
        log_masked "Removed stale local relay record $existing_relay_id before bootstrap (HTTP $delete_code)"
      fi
    done < <(jq -r --arg public_addr "$relay_public_addr" '.[]? | select(.public_addr == $public_addr) | .id' "$relay_list_resp" 2>/dev/null || true)
  else
    log_masked "Relay reconciliation skipped: list relays returned HTTP $list_relays_code"
  fi

  echo "  Creating bootstrap relay token..."
  create_token_code=$(curl -s -w "%{http_code}" --max-time 15 \
    -H "Authorization: Bearer $api_token" \
    -H "Content-Type: application/json" \
    -d "{\"name\":\"bootstrap-relay\",\"region\":\"$relay_region\",\"max_connections\":$relay_max_connections,\"ttl_hours\":24}" \
    -o "$relay_token_resp" \
    "http://127.0.0.1:8443/v1/relay-tokens" -X POST || echo "000")

  if [ "$create_token_code" = "201" ] || [ "$create_token_code" = "200" ]; then
    relay_reg_token="$(jq -r '.token // empty' "$relay_token_resp" 2>/dev/null || echo "")"
  else
    relay_reg_token=""
  fi

  if [ -z "$relay_reg_token" ]; then
    echo "  ERROR: Failed to create relay bootstrap token (HTTP $create_token_code)"
    echo "  Response: $(cat "$relay_token_resp" 2>/dev/null || echo "no response")"
    log_masked "Relay token creation failed (HTTP $create_token_code)"
  else
    echo "  Registering relay with coordinator..."
    register_code=$(curl -s -w "%{http_code}" --max-time 15 \
      -H "Content-Type: application/json" \
      -d "{\"token\":\"$relay_reg_token\",\"public_addr\":\"$relay_public_addr\",\"region\":\"$relay_region\",\"version\":\"installer-bootstrap\",\"max_connections\":$relay_max_connections}" \
      -o "$relay_register_resp" \
      "http://127.0.0.1:8443/v1/relay/register" -X POST || echo "000")

    relay_id=""
    relay_secret=""
    if [ "$register_code" = "201" ] || [ "$register_code" = "200" ]; then
      relay_id="$(jq -r '.relay_id // empty' "$relay_register_resp" 2>/dev/null || echo "")"
      relay_secret="$(jq -r '.relay_secret // empty' "$relay_register_resp" 2>/dev/null || echo "")"
    fi

    if [ -z "$relay_id" ] || [ -z "$relay_secret" ]; then
      echo "  ERROR: Failed to register relay (HTTP $register_code)"
      echo "  Response: $(cat "$relay_register_resp" 2>/dev/null || echo "no response")"
      log_masked "Relay registration failed (HTTP $register_code)"
    else
      echo "  Persisting relay identity..."
      mkdir -p "$(dirname "$relay_state_path")"
      cat > "$relay_state_path" <<EOF
{
  "relay_id": "$relay_id",
  "relay_secret": "$relay_secret"
}
EOF
      chmod 600 "$relay_state_path"
      chown rizoma-relay:rizoma-relay "$relay_state_path"

      echo "  Issuing relay mesh certificate for $relay_id..."
      meshca_code="000"
      for attempt in $(seq 1 10); do
        meshca_code=$(curl -s -w "%{http_code}" --max-time 15 \
          -H "Authorization: Bearer $api_token" \
          -H "Content-Type: application/json" \
          -d "{\"name\":\"$relay_id\",\"ip\":\"\"}" \
          -o "$relay_meshca_resp" \
          "http://127.0.0.1:8443/v1/mesh-ca" -X POST || echo "000")

        if [ "$meshca_code" = "200" ] && jq -e '.cert != null and .cert != "" and .key != null and .key != ""' "$relay_meshca_resp" >/dev/null 2>&1; then
          break
        fi

        if [ "$meshca_code" = "401" ] || [ "$meshca_code" = "403" ] || [ "$meshca_code" = "400" ]; then
          echo "  Relay certificate request rejected (HTTP $meshca_code), not retrying"
          break
        fi

        echo "  Relay certificate not ready yet (attempt $attempt/10, HTTP $meshca_code)"
        sleep 2
      done

      if [ "$meshca_code" = "200" ] && jq -e '.cert != null and .cert != "" and .key != null and .key != ""' "$relay_meshca_resp" >/dev/null 2>&1; then
        jq -r '.ca_cert' "$relay_meshca_resp" > /etc/rizoma/mesh_ca.pem
        jq -r '.cert' "$relay_meshca_resp" > /etc/rizoma/relay_cert.pem
        jq -r '.key' "$relay_meshca_resp" > /etc/rizoma/relay_key.pem
        chmod 600 /etc/rizoma/mesh_ca.pem /etc/rizoma/relay_cert.pem /etc/rizoma/relay_key.pem
        chown rizoma-relay:rizoma-relay /etc/rizoma/mesh_ca.pem /etc/rizoma/relay_cert.pem /etc/rizoma/relay_key.pem
        relay_bootstrap_ok=true
        log_masked "Relay bootstrap completed for relay ID: $relay_id"
      else
        echo "  ERROR: Relay certificate issuance failed (HTTP $meshca_code)"
        echo "  Response: $(cat "$relay_meshca_resp" 2>/dev/null || echo "no response")"
        echo "  Hint: inspect coordinator logs for mesh-ca/auth errors: journalctl -u rizoma-coordinator -n 100"
        log_masked "Relay mesh cert issuance failed (HTTP $meshca_code)"
      fi
    fi
  fi

  rm -f "$relay_token_resp" "$relay_register_resp" "$relay_meshca_resp" "$relay_list_resp"
fi

cat > /etc/rizoma/relay.env <<EOF
RIZOMA_RELAY_LISTEN=127.0.0.1:8082
RIZOMA_RELAY_INSECURE_HTTP=true
RIZOMA_RELAY_COORDINATOR_URL=http://127.0.0.1:8443
RIZOMA_RELAY_RELAY_STATE=$relay_state_path
RIZOMA_RELAY_RELAY_REGION=$relay_region
RIZOMA_RELAY_QUIC_LISTEN=$relay_quic_listen
RIZOMA_RELAY_QUIC_PUBLIC_ADDR=$relay_public_addr
RIZOMA_RELAY_MAX_CONNECTIONS=$relay_max_connections
RIZOMA_RELAY_FIREWALL_MODE=off
EOF

if [ "$relay_bootstrap_ok" = true ]; then
  cat >> /etc/rizoma/relay.env <<EOF
RIZOMA_RELAY_MESH_CA_CERT=/etc/rizoma/mesh_ca.pem
RIZOMA_RELAY_MESH_CERT=/etc/rizoma/relay_cert.pem
RIZOMA_RELAY_MESH_KEY=/etc/rizoma/relay_key.pem
EOF
  echo "  Relay configured with registered identity and mesh certificates"
else
  echo "  WARNING: Relay bootstrap incomplete. Relay service may start without working heartbeats."
  echo "  Check coordinator logs: journalctl -u rizoma-coordinator"
fi

chmod 600 /etc/rizoma/relay.env
chown rizoma-relay:rizoma-relay /etc/rizoma/relay.env

systemctl enable --now rizoma-dashboard
systemctl enable --now rizoma-relay
echo "  Publishing dashboard through ingress..."
dashboard_service_payload="$(jq -nc \
  --arg name "Rizoma Dashboard" \
  --arg public_host "$domain" \
  '{
    name: $name,
    mode: "http",
    upstream_protocol: "http",
    public_host: $public_host,
    path: "/",
    path_match_type: "prefix",
    enabled: true,
    target: {type: "direct_host", ip: "127.0.0.1", port: 8080},
    protection: {mode: "none"},
    pass_host_header: true,
    rewrite_redirects: false
  }')"
dashboard_services_json="$(curl -fsS --max-time 15 \
  -H "Authorization: Bearer $api_token" \
  "http://127.0.0.1:8443/v1/ingress/services?include_system=true" 2>/dev/null || echo "[]")"
dashboard_service_id="$(printf '%s' "$dashboard_services_json" | jq -r --arg public_host "$domain" '.[] | select(.public_host==$public_host) | .id' 2>/dev/null | head -n1)"
if [ -n "$dashboard_service_id" ] && [ "$dashboard_service_id" != "null" ]; then
  curl -fsS --max-time 15 \
    -H "Authorization: Bearer $api_token" \
    -H "Content-Type: application/json" \
    -X PUT \
    -d "$dashboard_service_payload" \
    "http://127.0.0.1:8443/v1/ingress/services/$dashboard_service_id" >/dev/null
  echo "  Dashboard ingress service updated"
else
  curl -fsS --max-time 15 \
    -H "Authorization: Bearer $api_token" \
    -H "Content-Type: application/json" \
    -d "$dashboard_service_payload" \
    "http://127.0.0.1:8443/v1/ingress/services" >/dev/null
  echo "  Dashboard ingress service created"
fi
systemctl enable --now rizoma-ingress
echo "  Services started"

# --- Verify ingress edge ---
echo "[7/8] Verifying ingress edge..."
systemctl restart rizoma-ingress

echo "  Waiting for ingress TLS..."
ingress_ready=false
for i in $(seq 1 60); do
  if curl -fsS --max-time 5 --resolve "$domain:443:$ingress_resolve_ip" "https://$domain/healthz" >/dev/null 2>&1; then
    echo "  Ingress Let's Encrypt TLS ready (attempt $i/60)"
    ingress_ready=true
    break
  fi
  if [ $((i % 10)) -eq 0 ]; then
    echo "  Still waiting for ingress TLS... (attempt $i/60)"
  fi
  sleep 2
done

if [ "$ingress_ready" != "true" ]; then
  echo "  ERROR: Ingress failed to obtain/serve a trusted Let's Encrypt certificate on $ingress_listen_host:443"
  echo "  --- Last 30 lines of ingress logs ---"
  journalctl -u rizoma-ingress --no-pager -n 30 || true
  echo "  --- Ingress service status ---"
  systemctl status rizoma-ingress --no-pager -l || true
  exit 1
fi

redirect_location="$(curl -fsSI --max-time 3 --resolve "$domain:80:$ingress_resolve_ip" "http://$domain/healthz" 2>/dev/null | awk 'tolower($1)=="location:" {print $2}' | tr -d '\r' | head -n1 || true)"
if [ "$redirect_location" = "https://$domain/healthz" ]; then
  echo "  HTTP redirect ready"
else
  echo "  WARNING: HTTP redirect check did not return https://$domain/healthz"
fi

# --- Create supervisor user ---
echo "[8/8] Creating Supervisor account..."

# Wait for coordinator
for i in $(seq 1 20); do
  if curl -fsS --max-time 3 "http://127.0.0.1:8443/healthz" >/dev/null 2>&1; then
    break
  fi
  sleep 2
done

roles="$(curl -fsS -H "Authorization: Bearer $api_token" "http://127.0.0.1:8443/v1/roles" 2>/dev/null || echo "[]")"
supervisor_role_id="$(echo "$roles" | jq -r '.[] | select((.name|ascii_downcase)=="supervisor") | .id' 2>/dev/null | head -n1)"
if [ -z "$supervisor_role_id" ]; then
  supervisor_role_id="$(curl -fsS -H "Authorization: Bearer $api_token" -H "Content-Type: application/json" \
    -d "{\"name\":\"Supervisor\",\"description\":\"Master operator role\",\"permissions\":[\"supervisor\",\"admin\",\"write\",\"read\"]}" \
    "http://127.0.0.1:8443/v1/roles" 2>/dev/null | jq -r '.id')"
fi
if [ -z "$supervisor_role_id" ] || [ "$supervisor_role_id" = "null" ]; then
  echo "ERROR: failed to resolve Supervisor role"; exit 1
fi

users_json="$(curl -fsS -H "Authorization: Bearer $api_token" "http://127.0.0.1:8443/v1/users" 2>/dev/null || echo "[]")"
existing_user="$(echo "$users_json" | jq -r ".[] | select(.email==\"$admin_email\") | .id" 2>/dev/null | head -n1)"
existing_supervisor_pin_set="$(echo "$users_json" | jq -r ".[] | select(.email==\"$admin_email\") | .supervisor_pin_set // false" 2>/dev/null | head -n1)"
supervisor_pin=""
if [ -z "$existing_user" ]; then
  supervisor_pin="$(generate_supervisor_pin)"
  # Create user with password in request body (not in process list)
  # Use a temporary file to avoid password in command line
  user_payload="$(mktemp /tmp/rizoma-user-payload.XXXXXX)"
  chmod 600 "$user_payload"
  cat > "$user_payload" <<USEREOF
{
  "email": "$admin_email",
  "name": "$admin_name",
  "permissions": ["supervisor", "admin", "write", "read"],
  "role_ids": ["$supervisor_role_id"],
  "team_ids": [],
  "disabled": false,
  "password": "$admin_password",
  "must_change_password": false,
  "supervisor_pin": "$supervisor_pin"
}
USEREOF
  
  if ! create_result="$(curl -fsS -H "Authorization: Bearer $api_token" \
    -H "Content-Type: application/json" \
    -d @"$user_payload" \
    "http://127.0.0.1:8443/v1/users" 2>&1)"; then
    rm -f "$user_payload"
    echo "ERROR: Supervisor user creation failed"
    echo "Response: $create_result"
    exit 1
  fi
  response_pin="$(echo "$create_result" | jq -r '.supervisor_pin // empty' 2>/dev/null || true)"
  if [ -n "$response_pin" ]; then
    supervisor_pin="$response_pin"
  fi
  
  # Securely delete the payload file
  rm -f "$user_payload"
  log_masked "Supervisor user created for $admin_email"
else
  user_payload="$(mktemp /tmp/rizoma-user-payload.XXXXXX)"
  chmod 600 "$user_payload"
  if [ "$existing_supervisor_pin_set" = "true" ]; then
    cat > "$user_payload" <<USEREOF
{
  "permissions": ["supervisor", "admin", "write", "read"],
  "role_ids": ["$supervisor_role_id"],
  "disabled": false
}
USEREOF
  else
    supervisor_pin="$(generate_supervisor_pin)"
    cat > "$user_payload" <<USEREOF
{
  "permissions": ["supervisor", "admin", "write", "read"],
  "role_ids": ["$supervisor_role_id"],
  "disabled": false,
  "supervisor_pin": "$supervisor_pin"
}
USEREOF
  fi
  if ! update_result="$(curl -fsS -X PUT -H "Authorization: Bearer $api_token" \
    -H "Content-Type: application/json" \
    -d @"$user_payload" \
    "http://127.0.0.1:8443/v1/users/$existing_user" 2>&1)"; then
    rm -f "$user_payload"
    echo "ERROR: Supervisor user normalization failed"
    echo "Response: $update_result"
    exit 1
  fi
  response_pin="$(echo "$update_result" | jq -r '.supervisor_pin // empty' 2>/dev/null || true)"
  if [ -n "$response_pin" ]; then
    supervisor_pin="$response_pin"
  fi
  rm -f "$user_payload"
  log_masked "Supervisor user normalized for $admin_email"
fi

users_json="$(curl -fsS -H "Authorization: Bearer $api_token" "http://127.0.0.1:8443/v1/users" 2>/dev/null || echo "[]")"
supervisor_ready="$(echo "$users_json" | jq -r ".[] | select(.email==\"$admin_email\") | select((.supervisor_pin_set == true) and ((.permissions // []) | index(\"supervisor\"))) | .id" 2>/dev/null | head -n1)"
if [ -z "$supervisor_ready" ]; then
  echo "ERROR: Supervisor bootstrap verification failed"
  echo "The initial account was created but does not have Supervisor authority and PIN state."
  exit 1
fi

echo "  Supervisor account ready"

echo ""
echo "  ╔══════════════════════════════════════╗"
echo "  ║     Rizoma Platform Installed        ║"
echo "  ╚══════════════════════════════════════╝"
echo ""
echo "  Dashboard:   https://$domain/"
echo "  TLS:         Let's Encrypt via rizoma-ingress"
echo "  ACME cache:  $ingress_acme_cache"
echo "  Supervisor: $admin_email"
if [ -n "$supervisor_pin" ]; then
print_secret_line "  Supervisor PIN: $supervisor_pin"
else
echo "  Supervisor PIN: already set for existing account"
fi
print_secret_line "  API token:   $api_token"
echo ""
echo "  ╔══════════════════════════════════════════════════╗"
echo "  ║           SECURITY REMINDERS                     ║"
echo "  ╠══════════════════════════════════════════════════╣"
echo "  ║ • Save the API token in a secure password manager║"
echo "  ║ • On first login, you will be asked to change    ║"
echo "  ║   your password                                  ║"
echo "  ║ • Environment files are at /etc/rizoma/*.env     ║"
echo "  ║   with 600 permissions (root only)               ║"
echo "  ║ • PostgreSQL credentials stored in service env   ║"
echo "  ║ • Installer log at: $log_file"
echo "  ║   (review for any sensitive data leaks)          ║"
echo "  ╚══════════════════════════════════════════════════╝"
echo ""

# Final security notice
log_masked "Installation complete. API token displayed once - save it securely."
log_masked "Supervisor user ready for: $admin_email"
