From f51daf06d6d5b5efaf15da9fd438600f06ffe152 Mon Sep 17 00:00:00 2001 From: Michael Cammarata <55518507+mgrimace@users.noreply.github.com> Date: Thu, 17 Jul 2025 06:16:22 -0400 Subject: [PATCH] Docker rewrite and optimizations (#321) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Optimizations of dockerfile Massive test optimizations with drop in image size to about 256mb from about 1.2 gb. Drawback is that I currently have to keep the dockerfile playwright version matched to the package.json version * further optimizations Removed redundant (hopefully) sessions directory creation during build * Fix docker cron dependencies Small fix that should make cron run properly * Major docker update! - **Dockerfile rewritten as a multi-stage build** - Split into a “builder” stage (`node:18-slim`) to install dependencies and compile TypeScript, and a “runtime” stage (official Playwright image) to run the script. - This keeps build tools and dependencies out of the final image, making it smaller, faster to pull, and more secure. - **Entrypoint script (`entrypoint.sh`)** - Introduced an entrypoint that runs inside the container at startup to: 1. Set the container’s timezone (`TZ`) correctly, based on the environment or defaulting to UTC. 2. Validate that the user provided a `CRON_SCHEDULE` (exiting early with an error if missing). 3. Optionally perform an initial run of the script immediately (when `RUN_ON_START=true`), without any random sleep. - Centralizing setup in an entrypoint keeps the Dockerfile simpler and ensures proper signal handling. - **`run_daily.sh` improvements** - Removed custom browser-path override so Playwright uses bundled browsers in the official image. - Added a lock using `flock` to prevent overlapping runs if a previous run is still in progress. - Retained the random sleep between 5 and 50 minutes before each run. - Logs are timestamped and clearly report success or failure. - **Cron template tweaks** - Updated `src/crontab.template` so that each job line redirects both stdout and stderr into Docker’s stdout (`>> /proc/1/fd/1 2>&1`), making it easy to view logs via `docker logs`. - **Initial-run logic** - The entrypoint checks `RUN_ON_START=true` and, if set, invokes `npm start` immediately (without random sleep). This provides an immediate first execution on container startup. - Scheduled runs via cron still go through the normal `run_daily.sh` (with sleep and locking). - **Cron logging and visibility** - By redirecting cron job output to the container’s stdout, all logs (initial run and scheduled runs) appear in `docker logs`, avoiding the need to tail log files manually. - **Error handling and validation** - Entry point exits early if `CRON_SCHEDULE` is missing, preventing silent misconfiguration. - If the initial run fails, it logs a warning but still starts cron so future scheduled runs can proceed. - `run_daily.sh` will exit early if a previous run is still active (locking), avoiding overlapping executions. * Docker (multi-stage) improvements - added cron logging in entrypoint and fixed timezone support for cron-invoked script runs - further optimized multi-stage dockerfile - bumped playwright version to 1.52.0 in dockerfile and package.json - added customization and enable/disable randomization for cron start times - optionally add container health monitor and resource limits in compose.yaml --- Dockerfile | 115 +++++++++++++++++++++++++------------------ compose.yaml | 42 ++++++++++++---- entrypoint.sh | 50 +++++++++++++++++++ package.json | 4 +- src/crontab.template | 3 +- src/run_daily.sh | 58 +++++++++++++--------- 6 files changed, 189 insertions(+), 83 deletions(-) create mode 100755 entrypoint.sh mode change 100644 => 100755 src/run_daily.sh diff --git a/Dockerfile b/Dockerfile index 16bd899..c92d736 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,47 +1,68 @@ -# Use an official Node.js runtime as a base image -FROM node:18 - -# Set the working directory in the container -WORKDIR /usr/src/microsoft-rewards-script - -# Install necessary dependencies for Playwright and cron -RUN apt-get update && apt-get install -y \ - jq \ - cron \ - gettext-base \ - xvfb \ - libgbm-dev \ - libnss3 \ - libasound2 \ - libxss1 \ - libatk-bridge2.0-0 \ - libgtk-3-0 \ - tzdata \ - wget \ - && rm -rf /var/lib/apt/lists/* - -# Copy all files to the working directory -COPY . . - -# Install dependencies, set permissions, and build the script -RUN npm install && \ - chmod -R 755 /usr/src/microsoft-rewards-script/node_modules && \ - npm run pre-build && \ - npm run build - -# Copy cron file to cron directory -COPY src/crontab.template /etc/cron.d/microsoft-rewards-cron.template - -# Create the log file to be able to run tail -RUN touch /var/log/cron.log - -# Define the command to run your application with cron optionally -CMD ["sh", "-c", "echo \"$TZ\" > /etc/timezone && \ - ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \ - dpkg-reconfigure -f noninteractive tzdata && \ - envsubst < /etc/cron.d/microsoft-rewards-cron.template > /etc/cron.d/microsoft-rewards-cron && \ - chmod 0644 /etc/cron.d/microsoft-rewards-cron && \ - crontab /etc/cron.d/microsoft-rewards-cron && \ - cron -f & \ - ([ \"$RUN_ON_START\" = \"true\" ] && npm start) && \ - tail -f /var/log/cron.log"] \ No newline at end of file +############################################################################### +# Stage 1: Builder (compile TypeScript) +############################################################################### +FROM node:18-slim AS builder + +WORKDIR /usr/src/microsoft-rewards-script + +# Install minimal tooling if needed +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Copy package manifests +COPY package*.json ./ + +# Conditional install: npm ci if lockfile exists, else npm install +RUN if [ -f package-lock.json ]; then \ + npm ci; \ + else \ + npm install; \ + fi + +# Copy source code +COPY . . + +# Build TypeScript +RUN npm run build + +############################################################################### +# Stage 2: Runtime (Playwright image) +############################################################################### +FROM mcr.microsoft.com/playwright:v1.52.0-jammy + +WORKDIR /usr/src/microsoft-rewards-script + +# Install cron, gettext-base (for envsubst), tzdata noninteractively +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + cron gettext-base tzdata \ + && rm -rf /var/lib/apt/lists/* + +# Ensure Playwright uses preinstalled browsers +ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright + +# Copy package files first for better caching +COPY --from=builder /usr/src/microsoft-rewards-script/package*.json ./ + +# Install only production dependencies, with fallback +RUN if [ -f package-lock.json ]; then \ + npm ci --omit=dev --ignore-scripts; \ + else \ + npm install --production --ignore-scripts; \ + fi + +# Copy built application +COPY --from=builder /usr/src/microsoft-rewards-script/dist ./dist + +# Copy runtime scripts with proper permissions from the start +COPY --chmod=755 src/run_daily.sh ./src/run_daily.sh +COPY --chmod=644 src/crontab.template /etc/cron.d/microsoft-rewards-cron.template +COPY --chmod=755 entrypoint.sh /usr/local/bin/entrypoint.sh + +# Default TZ (overridden by user via environment) +ENV TZ=UTC + +# Entrypoint handles TZ, initial run toggle, cron templating & launch +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD ["sh", "-c", "echo 'Container started; cron is running.'"] diff --git a/compose.yaml b/compose.yaml index a4fbab0..e52f2b3 100644 --- a/compose.yaml +++ b/compose.yaml @@ -1,15 +1,39 @@ services: - microsoft-rewards-script: + netsky: build: . container_name: microsoft-rewards-script restart: unless-stopped + + # Volume mounts: Specify a location where you want to save the files on your local machine. volumes: - ### Replace "/path/to/" with the actual path to where you want to save the files on your local machine. - - /path/to/accounts.json:/usr/src/microsoft-rewards-script/dist/accounts.json - - /path/to/config.json:/usr/src/microsoft-rewards-script/dist/config.json - - /path/to/sessions:/usr/src/microsoft-rewards-script/dist/browser/sessions # Optional, saves your login session + - ./src/accounts.json:/usr/src/microsoft-rewards-script/dist/accounts.json:ro + - ./src/config.json:/usr/src/microsoft-rewards-script/dist/config.json:ro + - ./sessions:/usr/src/microsoft-rewards-script/dist/browser/sessions # Optional, saves your login session + environment: - - NODE_ENV=production - - CRON_SCHEDULE=0 7,15,20 * * * # Customize your schedule, use crontab.guru for formatting - - RUN_ON_START=true # Runs the script on container startup - - TZ=America/Toronto # Set your timezone for proper scheduling \ No newline at end of file + TZ: "America/Toronto" # Set your timezone for proper scheduling + NODE_ENV: "production" + CRON_SCHEDULE: "0 7,16,20 * * *" # Customize your schedule, use crontab.guru for formatting + RUN_ON_START: "true" # Runs the script on container startup + + # Start-time randomization (uncomment to customize or disable) + #MIN_SLEEP_MINUTES: "5" + #MAX_SLEEP_MINUTES: "50" + SKIP_RANDOM_SLEEP: "false" + + # Optional resource limits for the container + mem_limit: 4g + cpus: 2 + + # Health check - monitors if cron daemon is running to ensure scheduled jobs can execute + # Container marked unhealthy if cron process dies + healthcheck: + test: ["CMD", "sh", "-c", "pgrep cron > /dev/null || exit 1"] + interval: 60s + timeout: 10s + retries: 3 + start_period: 30s + + # Security hardening + security_opt: + - no-new-privileges:true \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100755 index 0000000..0a4e46b --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Ensure Playwright uses preinstalled browsers +export PLAYWRIGHT_BROWSERS_PATH=/ms-playwright + +# 1. Timezone: default to UTC if not provided +: "${TZ:=UTC}" +ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime +echo "$TZ" > /etc/timezone +dpkg-reconfigure -f noninteractive tzdata + +# 2. Validate CRON_SCHEDULE +if [ -z "${CRON_SCHEDULE:-}" ]; then + echo "ERROR: CRON_SCHEDULE environment variable is not set." >&2 + echo "Please set CRON_SCHEDULE (e.g., \"0 2 * * *\")." >&2 + exit 1 +fi + +# 3. Initial run without sleep if RUN_ON_START=true +if [ "${RUN_ON_START:-false}" = "true" ]; then + echo "[entrypoint] Starting initial run in background at $(date)" + ( + cd /usr/src/microsoft-rewards-script || { + echo "[entrypoint-bg] ERROR: Unable to cd to /usr/src/microsoft-rewards-script" >&2 + exit 1 + } + # Skip random sleep for initial run, but preserve setting for cron jobs + SKIP_RANDOM_SLEEP=true src/run_daily.sh + echo "[entrypoint-bg] Initial run completed at $(date)" + ) & + echo "[entrypoint] Background process started (PID: $!)" +fi + +# 4. Template and register cron file with explicit timezone export +if [ ! -f /etc/cron.d/microsoft-rewards-cron.template ]; then + echo "ERROR: Cron template /etc/cron.d/microsoft-rewards-cron.template not found." >&2 + exit 1 +fi + +# Export TZ for envsubst to use +export TZ +envsubst < /etc/cron.d/microsoft-rewards-cron.template > /etc/cron.d/microsoft-rewards-cron +chmod 0644 /etc/cron.d/microsoft-rewards-cron +crontab /etc/cron.d/microsoft-rewards-cron + +echo "[entrypoint] Cron configured with schedule: $CRON_SCHEDULE and timezone: $TZ; starting cron at $(date)" + +# 5. Start cron in foreground (PID 1) +exec cron -f \ No newline at end of file diff --git a/package.json b/package.json index bae5f97..ae24199 100644 --- a/package.json +++ b/package.json @@ -43,8 +43,8 @@ "http-proxy-agent": "^7.0.2", "https-proxy-agent": "^7.0.6", "ms": "^2.1.3", - "playwright": "1.47.2", - "rebrowser-playwright": "1.47.2", + "playwright": "1.52.0", + "rebrowser-playwright": "1.52.0", "socks-proxy-agent": "^8.0.5", "ts-node": "^10.9.2" } diff --git a/src/crontab.template b/src/crontab.template index e913807..5576966 100644 --- a/src/crontab.template +++ b/src/crontab.template @@ -1 +1,2 @@ -${CRON_SCHEDULE} TZ=${TZ} /bin/bash /usr/src/microsoft-rewards-script/src/run_daily.sh >> /proc/1/fd/1 2>> /proc/1/fd/2 +# Run automation according to CRON_SCHEDULE; redirect both stdout & stderr to Docker logs +${CRON_SCHEDULE} TZ=${TZ} /bin/bash /usr/src/microsoft-rewards-script/src/run_daily.sh >> /proc/1/fd/1 2>&1 diff --git a/src/run_daily.sh b/src/run_daily.sh old mode 100644 new mode 100755 index 31656fd..90d4b90 --- a/src/run_daily.sh +++ b/src/run_daily.sh @@ -1,32 +1,42 @@ -#!/bin/bash +#!/usr/bin/env bash +set -euo pipefail -# Set up environment variables -export PATH=$PATH:/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin +# Ensure Playwright uses the preinstalled browsers +export PLAYWRIGHT_BROWSERS_PATH=/ms-playwright -# Ensure TZ is set -export TZ=${TZ} +# Ensure TZ is set (entrypoint sets TZ system-wide); fallback if missing +export TZ="${TZ:-UTC}" -# Change directory to the application directory +# Change to project directory cd /usr/src/microsoft-rewards-script -# Define the minimum and maximum wait times in seconds -MINWAIT=$((5*60)) # 5 minutes -MAXWAIT=$((50*60)) # 50 minutes +# Optional: prevent overlapping runs +LOCKFILE=/tmp/run_daily.lock +exec 9>"$LOCKFILE" +if ! flock -n 9; then + echo "[$(date)] [run_daily.sh] Previous instance still running; exiting." + exit 0 +fi -# Calculate a random sleep time within the specified range -SLEEPTIME=$((MINWAIT + RANDOM % (MAXWAIT - MINWAIT))) +# Random sleep between configurable minutes (default 5-50 minutes) +MINWAIT=${MIN_SLEEP_MINUTES:-5} +MAXWAIT=${MAX_SLEEP_MINUTES:-50} +MINWAIT_SEC=$((MINWAIT*60)) +MAXWAIT_SEC=$((MAXWAIT*60)) -# Convert the sleep time to minutes for logging -SLEEP_MINUTES=$((SLEEPTIME / 60)) +# Skip sleep if SKIP_RANDOM_SLEEP is set to true +if [ "${SKIP_RANDOM_SLEEP:-false}" != "true" ]; then + SLEEPTIME=$(( MINWAIT_SEC + RANDOM % (MAXWAIT_SEC - MINWAIT_SEC) )) + SLEEP_MINUTES=$(( SLEEPTIME / 60 )) + echo "[$(date)] [run_daily.sh] Sleeping for $SLEEP_MINUTES minutes ($SLEEPTIME seconds) to randomize execution..." + sleep "$SLEEPTIME" +else + echo "[$(date)] [run_daily.sh] Skipping random sleep (SKIP_RANDOM_SLEEP=true)" +fi -# Log the sleep duration -echo "Sleeping for $SLEEP_MINUTES minutes ($SLEEPTIME seconds)..." - -# Sleep for the calculated time -sleep $SLEEPTIME - -# Log the start of the script -echo "Starting script..." - -# Execute the Node.js script directly -npm run start +echo "[$(date)] [run_daily.sh] Starting script..." +if npm start; then + echo "[$(date)] [run_daily.sh] Script completed successfully." +else + echo "[$(date)] [run_daily.sh] ERROR: Script failed!" >&2 +fi \ No newline at end of file