From 6be77636987e66c8f3876994b1fbcd859aae9433 Mon Sep 17 00:00:00 2001 From: LightZirconite Date: Sat, 15 Nov 2025 14:46:44 +0100 Subject: [PATCH] feat: add extractBalancedObject utility and tests; enhance dashboard parsing --- src/browser/BrowserFunc.ts | 85 ++++++++++----------- src/util/core/Utils.ts | 113 ++++++++++++++++++++++++++++ tests/extractBalancedObject.test.ts | 47 ++++++++++++ tests/sanitize.test.ts | 27 +++++++ 4 files changed, 226 insertions(+), 46 deletions(-) create mode 100644 tests/extractBalancedObject.test.ts create mode 100644 tests/sanitize.test.ts diff --git a/src/browser/BrowserFunc.ts b/src/browser/BrowserFunc.ts index df2dbf8..468b76d 100644 --- a/src/browser/BrowserFunc.ts +++ b/src/browser/BrowserFunc.ts @@ -9,6 +9,7 @@ import { Counters, DashboardData, MorePromotion, PromotionalItem } from '../inte import { EarnablePoints } from '../interface/Points' import { QuizData } from '../interface/QuizData' import { waitForElementSmart, waitForPageReady } from '../util/browser/SmartWait' +import { extractBalancedObject } from '../util/core/Utils' import { saveSessionData } from '../util/state/Load' @@ -414,48 +415,37 @@ export default class BrowserFunc { * IMPROVED: Enhanced validation with structure checks */ private async parseDashboardFromScript(page: Page, scriptContent: string): Promise { - return await page.evaluate((scriptContent: string) => { - const patterns = [ - /var\s+dashboard\s*=\s*(\{[\s\S]*?\});/, - /dashboard\s*=\s*(\{[\s\S]*?\});/, - /var\s+dashboard\s*:\s*(\{[\s\S]*?\})\s*[,;]/ + try { + const anchors: (string | RegExp)[] = [ + /var\s+dashboard\s*=\s*/, + /dashboard\s*=\s*/, + /var\s+dashboard\s*:\s*/ ] - for (const regex of patterns) { - const match = regex.exec(scriptContent) - if (match && match[1]) { - try { - const jsonStr = match[1] - // Validate basic JSON structure before parsing - const trimmed = jsonStr.trim() - if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) { - continue - } + for (const anchor of anchors) { + const objStr = extractBalancedObject(scriptContent, anchor, 1000000) + if (!objStr) continue - const parsed = JSON.parse(jsonStr) + const trimmed = objStr.trim() + if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) continue - // Enhanced validation: check structure and type - if (typeof parsed !== 'object' || parsed === null) { - continue - } - - // Validate essential dashboard properties exist - if (!parsed.userStatus || typeof parsed.userStatus !== 'object') { - continue - } - - // Successfully validated dashboard structure - return parsed - } catch (e) { - // JSON.parse failed or validation error - try next pattern - continue - } + try { + const parsed = JSON.parse(trimmed) + if (typeof parsed !== 'object' || parsed === null) continue + if (!parsed.userStatus || typeof parsed.userStatus !== 'object') continue + return parsed as DashboardData + } catch { + // Try next anchor + continue } } return null - - }, scriptContent) + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + this.bot.log(this.bot.isMobile, 'GET-DASHBOARD-DATA', `Dashboard parse error: ${errorMessage}`, 'error') + return null + } } /** @@ -639,19 +629,22 @@ export default class BrowserFunc { } if (scriptContent && foundVariable) { - // Escape dots in variable name for regex - const escapedVar = foundVariable.replace(/\./g, '\\.') - const regex = new RegExp(`${escapedVar}\\s*=\\s*({.*?});`, 's') - const match = regex.exec(scriptContent) - - if (match && match[1]) { - const quizData = JSON.parse(match[1]) - this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Found quiz data using variable: ${foundVariable}`, 'log') - return quizData - } else { - this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Variable ${foundVariable} found but could not extract JSON data`, 'error') - throw new Error(`Quiz data variable ${foundVariable} found but JSON extraction failed`) + const anchor = new RegExp(foundVariable.replace(/\./g, '\\.') + "\\s*=\\s*") + const objStr = extractBalancedObject(scriptContent, anchor, 500000) + if (objStr) { + try { + const quizData = JSON.parse(objStr) + this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Found quiz data using variable: ${foundVariable}`, 'log') + return quizData + } catch (e) { + const msg = e instanceof Error ? e.message : String(e) + this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Quiz JSON parse failed for ${foundVariable}: ${msg}`, 'error') + throw new Error(`Quiz data JSON parse failed: ${msg}`) + } } + + this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Variable ${foundVariable} found but could not extract JSON data`, 'error') + throw new Error(`Quiz data variable ${foundVariable} found but JSON extraction failed`) } else { // Log available scripts for debugging const allScripts = $('script') diff --git a/src/util/core/Utils.ts b/src/util/core/Utils.ts index 0d94485..db93e3f 100644 --- a/src/util/core/Utils.ts +++ b/src/util/core/Utils.ts @@ -224,4 +224,117 @@ export function normalizeRecoveryEmail(recoveryEmail: unknown): string | undefin const trimmed = recoveryEmail.trim() return trimmed === '' ? undefined : trimmed +} + +/** + * Apply a global regex replacement repeatedly until the string stops changing. + * Ensures effective sanitization when single-pass replacement can reveal new matches. + * + * IMPORTANT: Provide a safe, bounded pattern. A too-broad pattern can still be expensive. + * + * @param input Source string + * @param pattern Regular expression to apply (global flag enforced) + * @param replacement Replacement string or function + * @param maxPasses Safety cap to prevent infinite loops (default 1000) + * @returns Final stabilized string + */ +export function replaceUntilStable( + input: string, + pattern: RegExp, + replacement: string | ((substring: string, ...args: any[]) => string), + maxPasses: number = 1000 +): string { + if (!(pattern instanceof RegExp)) { + throw new Error('pattern must be a RegExp') + } + + // Ensure global flag to replace all occurrences each pass + const flags = pattern.flags.includes('g') ? pattern.flags : pattern.flags + 'g' + const globalPattern = new RegExp(pattern.source, flags) + + let previous = input + for (let i = 0; i < maxPasses; i++) { + const next = previous.replace(globalPattern, replacement as any) + if (next === previous) return next + previous = next + } + return previous +} + +/** + * Safely extract a balanced JavaScript/JSON object starting at the first '{' after an anchor. + * Linear-time scan with brace depth counting and string handling to avoid catastrophic backtracking. + * + * @param text Full source text to scan + * @param anchor String or RegExp indicating where the assignment occurs (scan starts at first '{' after anchor) + * @param maxScan Maximum characters to scan from the first '{' (prevents excessive work on malformed inputs) + * @returns Object text including outer braces, or null if not found/imbalanced/exceeded limits + */ +export function extractBalancedObject(text: string, anchor: string | RegExp, maxScan: number = 500000): string | null { + try { + let startIdx = -1 + + if (typeof anchor === 'string') { + const pos = text.indexOf(anchor) + if (pos === -1) return null + startIdx = pos + anchor.length + } else { + const match = anchor.exec(text) + if (!match || match.index == null) return null + startIdx = match.index + match[0].length + } + + // Find the first '{' after the anchor + const braceStart = text.indexOf('{', startIdx) + if (braceStart === -1) return null + + let depth = 0 + let inString = false + let stringQuote: '"' | "'" | '`' | null = null + let escaped = false + + const endLimit = Math.min(text.length, braceStart + maxScan) + + for (let i = braceStart; i < endLimit; i++) { + const ch = text[i] + + if (inString) { + if (escaped) { + escaped = false + continue + } + if (ch === '\\') { + escaped = true + continue + } + if (ch === stringQuote) { + inString = false + stringQuote = null + } + continue + } + + // Not inside a string + if (ch === '"' || ch === "'" || ch === '`') { + inString = true + stringQuote = ch as '"' | "'" | '`' + escaped = false + continue + } + + if (ch === '{') { + depth++ + } else if (ch === '}') { + depth-- + if (depth === 0) { + return text.slice(braceStart, i + 1) + } + } + } + + // If we exit the loop without returning, either imbalanced or exceeded limit + return null + } catch { + return null + } } \ No newline at end of file diff --git a/tests/extractBalancedObject.test.ts b/tests/extractBalancedObject.test.ts new file mode 100644 index 0000000..4a00c6a --- /dev/null +++ b/tests/extractBalancedObject.test.ts @@ -0,0 +1,47 @@ +import assert from 'node:assert/strict' +import test from 'node:test' + +import { extractBalancedObject } from '../src/util/core/Utils' + +const wrap = (before: string, obj: string, after = ';') => `${before}${obj}${after}` + +test('extractBalancedObject extracts simple object after string anchor', () => { + const obj = '{"a":1,"b":2}' + const text = wrap('var dashboard = ', obj) + const out = extractBalancedObject(text, 'var dashboard = ') + assert.equal(out, obj) +}) + +test('extractBalancedObject extracts with regex anchor and whitespace', () => { + const obj = '{"x": {"y": 3}}' + const text = wrap('dashboard = ', obj) + const out = extractBalancedObject(text, /dashboard\s*=\s*/) + assert.equal(out, obj) +}) + +test('extractBalancedObject handles nested braces and strings safely', () => { + const obj = '{"t":"{ not a brace }","n": {"inner": {"v": "} in string"}}}' + const text = wrap('var dashboard = ', obj) + const out = extractBalancedObject(text, 'var dashboard = ') + assert.equal(out, obj) +}) + +test('extractBalancedObject handles escaped quotes inside strings', () => { + const obj = '{"s":"\\"quoted\\" braces { }","k":1}' + const text = wrap('dashboard = ', obj) + const out = extractBalancedObject(text, 'dashboard = ') + assert.equal(out, obj) +}) + +test('extractBalancedObject returns null when anchor missing', () => { + const text = 'no object here' + const out = extractBalancedObject(text, 'var dashboard = ') + assert.equal(out, null) +}) + +test('extractBalancedObject returns null on imbalanced braces or limit', () => { + const start = 'var dashboard = ' + const text = `${start}{"a": {"b": 1}` // missing final brace + const out = extractBalancedObject(text, start) + assert.equal(out, null) +}) diff --git a/tests/sanitize.test.ts b/tests/sanitize.test.ts new file mode 100644 index 0000000..a40a9ba --- /dev/null +++ b/tests/sanitize.test.ts @@ -0,0 +1,27 @@ +import assert from 'node:assert/strict' +import test from 'node:test' + +import { replaceUntilStable } from '../src/util/core/Utils' + +test('remove HTML comments with repeated replacement', () => { + const input = '>' + const out = replaceUntilStable(input, /'), false) + // Remaining string should not contain full HTML comment delimiters + assert.equal(//g.test(out), false) +}) + +test('path traversal: repeated removal of ../ sequences', () => { + const input = '/./.././' + const out = replaceUntilStable(input, /\.\.\//, '') + assert.equal(out.includes('..'), false) +}) + +test('enforces global flag if missing', () => { + const input = 'ac' + // remove tag brackets to neutralize tags (illustrative only) + const out = replaceUntilStable(input, /<|>/, '') + assert.equal(out.includes('<'), false) + assert.equal(out.includes('>'), false) +})