#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" APP_HOME="$(cd "${SCRIPT_DIR}/.." && pwd)" CONFIG_FILE="${APP_HOME}/config/catalogsync.env" RUN_DIR="${APP_HOME}/run" DEPLOY_DIR="${APP_HOME}/deploy" LOCK_DIR="${RUN_DIR}/deploy.lock" PID_FILE="${RUN_DIR}/serve.pid" TARGET_DIR="${APP_HOME}/app/musicdl/catalogsync" DEFAULT_STAGING_DIR="${APP_HOME}/deploy/staging/catalogsync" BACKUP_ROOT="${APP_HOME}/deploy/backups" STAGING_DIR="${DEFAULT_STAGING_DIR}" HEALTH_URL="" HEALTH_RETRIES=45 HEALTH_INTERVAL_SECONDS=1 KEEP_BACKUPS=5 SKIP_HEALTH_CHECK=0 BACKUP_DIR="" HAS_BACKUP=0 # shellcheck source=./load_env.sh source "${SCRIPT_DIR}/load_env.sh" usage() { cat <&2 exit 1 } require_var() { local var_name="$1" if [[ -z "${!var_name:-}" ]]; then fail "Missing required config variable: ${var_name} (from ${CONFIG_FILE})" fi } validate_positive_integer() { local value="$1" local name="$2" if ! [[ "${value}" =~ ^[0-9]+$ ]] || (( value < 1 )); then fail "${name} must be a positive integer: ${value}" fi } acquire_deploy_lock() { mkdir -p "${RUN_DIR}" if mkdir "${LOCK_DIR}" 2>/dev/null; then echo "$$" > "${LOCK_DIR}/owner_pid" return 0 fi local owner_pid="" if [[ -f "${LOCK_DIR}/owner_pid" ]]; then owner_pid="$(cat "${LOCK_DIR}/owner_pid" 2>/dev/null || true)" fi if [[ -n "${owner_pid}" ]] && kill -0 "${owner_pid}" 2>/dev/null; then fail "Another deploy is running (owner_pid=${owner_pid})" fi rm -rf "${LOCK_DIR}" if ! mkdir "${LOCK_DIR}" 2>/dev/null; then fail "Cannot acquire deploy lock: ${LOCK_DIR}" fi echo "$$" > "${LOCK_DIR}/owner_pid" } cleanup_lock() { rm -rf "${LOCK_DIR}" } stop_service() { local pid="" if [[ -f "${PID_FILE}" ]]; then pid="$(cat "${PID_FILE}" 2>/dev/null || true)" fi if [[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null; then log "Stopping running service from PID file (pid=${pid})" kill -TERM "${pid}" 2>/dev/null || true for _ in $(seq 1 20); do if ! kill -0 "${pid}" 2>/dev/null; then break fi sleep 1 done if kill -0 "${pid}" 2>/dev/null; then log "Service still alive; force killing pid=${pid}" kill -KILL "${pid}" 2>/dev/null || true fi fi rm -f "${PID_FILE}" local serve_pattern="musicdl.catalogsync.cli serve" local wrapper_pattern="${APP_HOME}/bin/serve_console.sh" for _ in $(seq 1 10); do local serve_count local wrapper_count serve_count="$(count_matching_processes "${serve_pattern}")" wrapper_count="$(count_matching_processes "${wrapper_pattern}")" if [[ "${serve_count}" == "0" && "${wrapper_count}" == "0" ]]; then break fi kill_matching_processes "TERM" "${serve_pattern}" kill_matching_processes "TERM" "${wrapper_pattern}" sleep 1 done kill_matching_processes "KILL" "${serve_pattern}" kill_matching_processes "KILL" "${wrapper_pattern}" } start_service() { local launch_log="${LOG_DIR}/serve_console_launch_$(date +%Y%m%d_%H%M%S).log" nohup bash "${APP_HOME}/bin/serve_console.sh" >"${launch_log}" 2>&1 & local launcher_pid=$! log "Started service launcher pid=${launcher_pid}, launch_log=${launch_log}" } sync_catalogsync() { if [[ ! -d "${STAGING_DIR}" ]]; then fail "Staging directory not found: ${STAGING_DIR}" fi if [[ ! -f "${STAGING_DIR}/__init__.py" ]]; then fail "Invalid staging directory (missing __init__.py): ${STAGING_DIR}" fi mkdir -p "${BACKUP_ROOT}" "$(dirname "${TARGET_DIR}")" BACKUP_DIR="${BACKUP_ROOT}/catalogsync_$(date +%Y%m%d_%H%M%S)" if [[ -d "${TARGET_DIR}" ]]; then mv "${TARGET_DIR}" "${BACKUP_DIR}" HAS_BACKUP=1 log "Backed up current catalogsync to ${BACKUP_DIR}" fi cp -a "${STAGING_DIR}" "${TARGET_DIR}" log "Synced new catalogsync from ${STAGING_DIR} -> ${TARGET_DIR}" } wait_health() { if (( SKIP_HEALTH_CHECK == 1 )); then log "Health check skipped by --skip-health-check" return 0 fi if ! command -v curl >/dev/null 2>&1; then fail "curl is required for health check" fi log "Health checking: ${HEALTH_URL}" for _ in $(seq 1 "${HEALTH_RETRIES}"); do local code code="$(curl -s -o /dev/null -w '%{http_code}' "${HEALTH_URL}" || true)" if [[ "${code}" == "200" ]]; then log "Health check passed (HTTP 200)" return 0 fi sleep "${HEALTH_INTERVAL_SECONDS}" done log "Health check failed: ${HEALTH_URL}" return 1 } verify_single_instance() { local serve_count serve_count="$(count_matching_processes 'musicdl.catalogsync.cli serve')" if [[ "${serve_count}" != "1" ]]; then log "Unexpected serve process count: ${serve_count}" return 1 fi log "Single-instance check passed (serve_count=${serve_count})" return 0 } list_matching_processes() { local pattern="$1" ps -ef | grep -F "${pattern}" | grep -v grep | awk '{print $2}' || true } count_matching_processes() { local pattern="$1" list_matching_processes "${pattern}" | awk 'NF {count++} END {print count+0}' } kill_matching_processes() { local signal_name="$1" local pattern="$2" local pid while IFS= read -r pid; do if [[ -n "${pid}" ]]; then kill "-${signal_name}" "${pid}" 2>/dev/null || true fi done < <(list_matching_processes "${pattern}") } rollback() { log "Starting rollback..." stop_service if (( HAS_BACKUP == 0 )) || [[ ! -d "${BACKUP_DIR}" ]]; then log "No backup available; rollback skipped" return 1 fi rm -rf "${TARGET_DIR}" mv "${BACKUP_DIR}" "${TARGET_DIR}" HAS_BACKUP=0 log "Restored backup to ${TARGET_DIR}" start_service if ! wait_health; then log "Rollback service failed health check" return 1 fi verify_single_instance } prune_backups() { if (( KEEP_BACKUPS < 1 )); then return 0 fi if [[ ! -d "${BACKUP_ROOT}" ]]; then return 0 fi mapfile -t backups < <(ls -1dt "${BACKUP_ROOT}"/catalogsync_* 2>/dev/null || true) if (( ${#backups[@]} <= KEEP_BACKUPS )); then return 0 fi for old_backup in "${backups[@]:KEEP_BACKUPS}"; do rm -rf "${old_backup}" log "Pruned old backup: ${old_backup}" done } while [[ $# -gt 0 ]]; do case "$1" in --staging-dir) STAGING_DIR="${2:-}" shift 2 ;; --health-url) HEALTH_URL="${2:-}" shift 2 ;; --health-retries) HEALTH_RETRIES="${2:-}" shift 2 ;; --health-interval-sec) HEALTH_INTERVAL_SECONDS="${2:-}" shift 2 ;; --keep-backups) KEEP_BACKUPS="${2:-}" shift 2 ;; --skip-health-check) SKIP_HEALTH_CHECK=1 shift ;; -h|--help) usage exit 0 ;; *) fail "Unknown argument: $1" ;; esac done validate_positive_integer "${HEALTH_RETRIES}" "HEALTH_RETRIES" validate_positive_integer "${HEALTH_INTERVAL_SECONDS}" "HEALTH_INTERVAL_SECONDS" validate_positive_integer "${KEEP_BACKUPS}" "KEEP_BACKUPS" if [[ -f "${CONFIG_FILE}" ]]; then load_env_file "${CONFIG_FILE}" else fail "Config file not found: ${CONFIG_FILE}. Copy catalogsync.env.example to catalogsync.env first." fi for required_var in WEB_PORT LOG_DIR; do require_var "${required_var}" done if [[ -z "${HEALTH_URL}" ]]; then HEALTH_URL="http://127.0.0.1:${WEB_PORT}/dashboard" fi mkdir -p "${DEPLOY_DIR}" "${RUN_DIR}" "${LOG_DIR}" "${BACKUP_ROOT}" "${APP_HOME}/app/musicdl" acquire_deploy_lock trap cleanup_lock EXIT INT TERM LOG_FILE="${LOG_DIR}/deploy_and_restart_$(date +%Y%m%d_%H%M%S).log" exec > >(tee -a "${LOG_FILE}") 2>&1 log "Starting deploy. staging=${STAGING_DIR}" log "Deploy log: ${LOG_FILE}" if ! sync_catalogsync; then fail "Sync step failed" fi stop_service start_service if ! wait_health; then log "New version failed health check; attempting rollback." if rollback; then fail "Deploy failed; rollback succeeded." fi fail "Deploy failed; rollback failed." fi if ! verify_single_instance; then log "Single-instance check failed; attempting rollback." if rollback; then fail "Deploy failed by single-instance check; rollback succeeded." fi fail "Deploy failed by single-instance check; rollback failed." fi prune_backups log "Deploy succeeded."