Improvements to docker scheduling and lock file (#339)

* improved logic that prevents overlapping runs, added logic to kill the process if it is stuck or doesn't exit properly

* Improve lock file logic

Improve lock file logic to release lock if the script didn't finish cleanly on a prior run. Otherwise shouldn't interrupt actual run.

* Tidy up readme for docker
This commit is contained in:
Michael Cammarata
2025-09-10 03:29:28 -04:00
committed by GitHub
parent eea4407454
commit 072e96dd53
3 changed files with 138 additions and 30 deletions

View File

@@ -35,24 +35,16 @@ Under development, however mainly for personal use!
2. **Update `accounts.json`**
3. **Edit `config.json`,** ensuring the following values are set (other settings are up to your preference):
```json
"headless": true,
"clusters": 1,
```
3. **Edit `config.json`,** ensuring "headless": true, other settings are up to your preference
### **Customize the `compose.yaml` File**
A basic docker `compose.yaml` is provided. Follow these steps to configure and run the container:
1. **Set Your Timezone:** Adjust the `TZ` variable to ensure correct scheduling.
2. **Configure Persistent Storage:**
- Map `config.json` and `accounts.json` to retain settings and accounts.
- (Optional) Use a persistent `sessions` folder to save login sessions.
3. **Customize the Schedule:**
- Modify `CRON_SCHEDULE` to set run times. Use [crontab.guru](https://crontab.guru) for help.
- **Note:** The container adds 550 minutes of random variability to each scheduled start time.
- **Note:** The container adds 550 minutes of random variability to each scheduled start time. This can be optionally disabled or customized in the compose file.
4. **(Optional) Run on Startup:**
- Set `RUN_ON_START=true` to execute the script immediately when the container starts.
5. **Start the Container:** Run `docker compose up -d` to build and launch.

View File

@@ -1,5 +1,5 @@
services:
netsky:
microsoft-rewards-script:
build: .
container_name: microsoft-rewards-script
restart: unless-stopped
@@ -14,13 +14,16 @@ services:
TZ: "America/Toronto" # Set your timezone for proper scheduling
NODE_ENV: "production"
CRON_SCHEDULE: "0 7,16,20 * * *" # Customize your schedule, use crontab.guru for formatting
RUN_ON_START: "true" # Runs the script on container startup
RUN_ON_START: "true" # Runs the script immediately on container startup
# Start-time randomization (uncomment to customize or disable)
# Add scheduled start-time randomization (uncomment to customize or disable, default: enabled)
#MIN_SLEEP_MINUTES: "5"
#MAX_SLEEP_MINUTES: "50"
SKIP_RANDOM_SLEEP: "false"
# Optionally set how long to wait before killing a stuck script run (prevents blocking future runs, default: 8 hours)
#STUCK_PROCESS_TIMEOUT_HOURS: "8"
# Optional resource limits for the container
mem_limit: 4g
cpus: 2

View File

@@ -1,42 +1,155 @@
#!/usr/bin/env bash
set -euo pipefail
# Ensure Playwright uses the preinstalled browsers
export PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
# Ensure TZ is set (entrypoint sets TZ system-wide); fallback if missing
export TZ="${TZ:-UTC}"
# Change to project directory
cd /usr/src/microsoft-rewards-script
# Optional: prevent overlapping runs
LOCKFILE=/tmp/run_daily.lock
exec 9>"$LOCKFILE"
if ! flock -n 9; then
echo "[$(date)] [run_daily.sh] Previous instance still running; exiting."
exit 0
# -------------------------------
# Function: Check and fix lockfile integrity
# -------------------------------
self_heal_lockfile() {
# If lockfile exists but is empty → remove it
if [ -f "$LOCKFILE" ]; then
local lock_content
lock_content=$(<"$LOCKFILE" || echo "")
if [[ -z "$lock_content" ]]; then
echo "[$(date)] [run_daily.sh] Found empty lockfile → removing."
rm -f "$LOCKFILE"
return
fi
# If lockfile contains non-numeric PID → remove it
if ! [[ "$lock_content" =~ ^[0-9]+$ ]]; then
echo "[$(date)] [run_daily.sh] Found corrupted lockfile content ('$lock_content') → removing."
rm -f "$LOCKFILE"
return
fi
# If lockfile contains PID but process is dead → remove it
if ! kill -0 "$lock_content" 2>/dev/null; then
echo "[$(date)] [run_daily.sh] Lockfile PID $lock_content is dead → removing stale lock."
rm -f "$LOCKFILE"
return
fi
fi
}
# -------------------------------
# Function: Acquire lock
# -------------------------------
acquire_lock() {
local max_attempts=5
local attempt=0
local timeout_hours=${STUCK_PROCESS_TIMEOUT_HOURS:-8}
local timeout_seconds=$((timeout_hours * 3600))
while [ $attempt -lt $max_attempts ]; do
# Try to create lock with current PID
if (set -C; echo "$$" > "$LOCKFILE") 2>/dev/null; then
echo "[$(date)] [run_daily.sh] Lock acquired successfully (PID: $$)"
return 0
fi
# Lock exists, validate it
if [ -f "$LOCKFILE" ]; then
local existing_pid
existing_pid=$(<"$LOCKFILE" || echo "")
echo "[$(date)] [run_daily.sh] Lock file exists with PID: '$existing_pid'"
# If lockfile content is invalid → delete and retry
if [[ -z "$existing_pid" || ! "$existing_pid" =~ ^[0-9]+$ ]]; then
echo "[$(date)] [run_daily.sh] Removing invalid lockfile → retrying..."
rm -f "$LOCKFILE"
continue
fi
# If process is dead → delete and retry
if ! kill -0 "$existing_pid" 2>/dev/null; then
echo "[$(date)] [run_daily.sh] Removing stale lock (dead PID: $existing_pid)"
rm -f "$LOCKFILE"
continue
fi
# Check process runtime → kill if exceeded timeout
local process_age
if process_age=$(ps -o etimes= -p "$existing_pid" 2>/dev/null | tr -d ' '); then
if [ "$process_age" -gt "$timeout_seconds" ]; then
echo "[$(date)] [run_daily.sh] Killing stuck process $existing_pid (${process_age}s > ${timeout_hours}h)"
kill -TERM "$existing_pid" 2>/dev/null || true
sleep 5
kill -KILL "$existing_pid" 2>/dev/null || true
rm -f "$LOCKFILE"
continue
fi
fi
fi
echo "[$(date)] [run_daily.sh] Lock held by PID $existing_pid, attempt $((attempt + 1))/$max_attempts"
sleep 2
((attempt++))
done
echo "[$(date)] [run_daily.sh] Could not acquire lock after $max_attempts attempts; exiting."
return 1
}
# -------------------------------
# Function: Release lock
# -------------------------------
release_lock() {
if [ -f "$LOCKFILE" ]; then
local lock_pid
lock_pid=$(<"$LOCKFILE")
if [ "$lock_pid" = "$$" ]; then
rm -f "$LOCKFILE"
echo "[$(date)] [run_daily.sh] Lock released (PID: $$)"
fi
fi
}
# Always release lock on exit — but only if we acquired it
trap 'release_lock' EXIT INT TERM
# -------------------------------
# MAIN EXECUTION FLOW
# -------------------------------
echo "[$(date)] [run_daily.sh] Current process PID: $$"
# Self-heal any broken or empty locks before proceeding
self_heal_lockfile
# Attempt to acquire the lock safely
if ! acquire_lock; then
exit 0
fi
# Random sleep between configurable minutes (default 5-50 minutes)
# Random sleep between MIN and MAX to spread execution
MINWAIT=${MIN_SLEEP_MINUTES:-5}
MAXWAIT=${MAX_SLEEP_MINUTES:-50}
MINWAIT_SEC=$((MINWAIT*60))
MAXWAIT_SEC=$((MAXWAIT*60))
# Skip sleep if SKIP_RANDOM_SLEEP is set to true
if [ "${SKIP_RANDOM_SLEEP:-false}" != "true" ]; then
SLEEPTIME=$(( MINWAIT_SEC + RANDOM % (MAXWAIT_SEC - MINWAIT_SEC) ))
SLEEP_MINUTES=$(( SLEEPTIME / 60 ))
echo "[$(date)] [run_daily.sh] Sleeping for $SLEEP_MINUTES minutes ($SLEEPTIME seconds) to randomize execution..."
echo "[$(date)] [run_daily.sh] Sleeping for $((SLEEPTIME/60)) minutes ($SLEEPTIME seconds)"
sleep "$SLEEPTIME"
else
echo "[$(date)] [run_daily.sh] Skipping random sleep (SKIP_RANDOM_SLEEP=true)"
echo "[$(date)] [run_daily.sh] Skipping random sleep"
fi
# Start the actual script
echo "[$(date)] [run_daily.sh] Starting script..."
if npm start; then
echo "[$(date)] [run_daily.sh] Script completed successfully."
echo "[$(date)] [run_daily.sh] Script completed successfully."
else
echo "[$(date)] [run_daily.sh] ERROR: Script failed!" >&2
fi
echo "[$(date)] [run_daily.sh] ERROR: Script failed!" >&2
fi
echo "[$(date)] [run_daily.sh] Script finished"
# Lock is released automatically via trap