feat: add extractBalancedObject utility and tests; enhance dashboard parsing

This commit is contained in:
2025-11-15 14:46:44 +01:00
parent e3596f657b
commit 6be7763698
4 changed files with 226 additions and 46 deletions

View File

@@ -9,6 +9,7 @@ import { Counters, DashboardData, MorePromotion, PromotionalItem } from '../inte
import { EarnablePoints } from '../interface/Points'
import { QuizData } from '../interface/QuizData'
import { waitForElementSmart, waitForPageReady } from '../util/browser/SmartWait'
import { extractBalancedObject } from '../util/core/Utils'
import { saveSessionData } from '../util/state/Load'
@@ -414,48 +415,37 @@ export default class BrowserFunc {
* IMPROVED: Enhanced validation with structure checks
*/
private async parseDashboardFromScript(page: Page, scriptContent: string): Promise<DashboardData | null> {
return await page.evaluate((scriptContent: string) => {
const patterns = [
/var\s+dashboard\s*=\s*(\{[\s\S]*?\});/,
/dashboard\s*=\s*(\{[\s\S]*?\});/,
/var\s+dashboard\s*:\s*(\{[\s\S]*?\})\s*[,;]/
try {
const anchors: (string | RegExp)[] = [
/var\s+dashboard\s*=\s*/,
/dashboard\s*=\s*/,
/var\s+dashboard\s*:\s*/
]
for (const regex of patterns) {
const match = regex.exec(scriptContent)
if (match && match[1]) {
try {
const jsonStr = match[1]
// Validate basic JSON structure before parsing
const trimmed = jsonStr.trim()
if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) {
continue
}
for (const anchor of anchors) {
const objStr = extractBalancedObject(scriptContent, anchor, 1000000)
if (!objStr) continue
const parsed = JSON.parse(jsonStr)
const trimmed = objStr.trim()
if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) continue
// Enhanced validation: check structure and type
if (typeof parsed !== 'object' || parsed === null) {
continue
}
// Validate essential dashboard properties exist
if (!parsed.userStatus || typeof parsed.userStatus !== 'object') {
continue
}
// Successfully validated dashboard structure
return parsed
} catch (e) {
// JSON.parse failed or validation error - try next pattern
continue
}
try {
const parsed = JSON.parse(trimmed)
if (typeof parsed !== 'object' || parsed === null) continue
if (!parsed.userStatus || typeof parsed.userStatus !== 'object') continue
return parsed as DashboardData
} catch {
// Try next anchor
continue
}
}
return null
}, scriptContent)
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error)
this.bot.log(this.bot.isMobile, 'GET-DASHBOARD-DATA', `Dashboard parse error: ${errorMessage}`, 'error')
return null
}
}
/**
@@ -639,19 +629,22 @@ export default class BrowserFunc {
}
if (scriptContent && foundVariable) {
// Escape dots in variable name for regex
const escapedVar = foundVariable.replace(/\./g, '\\.')
const regex = new RegExp(`${escapedVar}\\s*=\\s*({.*?});`, 's')
const match = regex.exec(scriptContent)
if (match && match[1]) {
const quizData = JSON.parse(match[1])
this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Found quiz data using variable: ${foundVariable}`, 'log')
return quizData
} else {
this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Variable ${foundVariable} found but could not extract JSON data`, 'error')
throw new Error(`Quiz data variable ${foundVariable} found but JSON extraction failed`)
const anchor = new RegExp(foundVariable.replace(/\./g, '\\.') + "\\s*=\\s*")
const objStr = extractBalancedObject(scriptContent, anchor, 500000)
if (objStr) {
try {
const quizData = JSON.parse(objStr)
this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Found quiz data using variable: ${foundVariable}`, 'log')
return quizData
} catch (e) {
const msg = e instanceof Error ? e.message : String(e)
this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Quiz JSON parse failed for ${foundVariable}: ${msg}`, 'error')
throw new Error(`Quiz data JSON parse failed: ${msg}`)
}
}
this.bot.log(this.bot.isMobile, 'GET-QUIZ-DATA', `Variable ${foundVariable} found but could not extract JSON data`, 'error')
throw new Error(`Quiz data variable ${foundVariable} found but JSON extraction failed`)
} else {
// Log available scripts for debugging
const allScripts = $('script')

View File

@@ -224,4 +224,117 @@ export function normalizeRecoveryEmail(recoveryEmail: unknown): string | undefin
const trimmed = recoveryEmail.trim()
return trimmed === '' ? undefined : trimmed
}
/**
* Apply a global regex replacement repeatedly until the string stops changing.
* Ensures effective sanitization when single-pass replacement can reveal new matches.
*
* IMPORTANT: Provide a safe, bounded pattern. A too-broad pattern can still be expensive.
*
* @param input Source string
* @param pattern Regular expression to apply (global flag enforced)
* @param replacement Replacement string or function
* @param maxPasses Safety cap to prevent infinite loops (default 1000)
* @returns Final stabilized string
*/
export function replaceUntilStable(
input: string,
pattern: RegExp,
replacement: string | ((substring: string, ...args: any[]) => string),
maxPasses: number = 1000
): string {
if (!(pattern instanceof RegExp)) {
throw new Error('pattern must be a RegExp')
}
// Ensure global flag to replace all occurrences each pass
const flags = pattern.flags.includes('g') ? pattern.flags : pattern.flags + 'g'
const globalPattern = new RegExp(pattern.source, flags)
let previous = input
for (let i = 0; i < maxPasses; i++) {
const next = previous.replace(globalPattern, replacement as any)
if (next === previous) return next
previous = next
}
return previous
}
/**
* Safely extract a balanced JavaScript/JSON object starting at the first '{' after an anchor.
* Linear-time scan with brace depth counting and string handling to avoid catastrophic backtracking.
*
* @param text Full source text to scan
* @param anchor String or RegExp indicating where the assignment occurs (scan starts at first '{' after anchor)
* @param maxScan Maximum characters to scan from the first '{' (prevents excessive work on malformed inputs)
* @returns Object text including outer braces, or null if not found/imbalanced/exceeded limits
*/
export function extractBalancedObject(text: string, anchor: string | RegExp, maxScan: number = 500000): string | null {
try {
let startIdx = -1
if (typeof anchor === 'string') {
const pos = text.indexOf(anchor)
if (pos === -1) return null
startIdx = pos + anchor.length
} else {
const match = anchor.exec(text)
if (!match || match.index == null) return null
startIdx = match.index + match[0].length
}
// Find the first '{' after the anchor
const braceStart = text.indexOf('{', startIdx)
if (braceStart === -1) return null
let depth = 0
let inString = false
let stringQuote: '"' | "'" | '`' | null = null
let escaped = false
const endLimit = Math.min(text.length, braceStart + maxScan)
for (let i = braceStart; i < endLimit; i++) {
const ch = text[i]
if (inString) {
if (escaped) {
escaped = false
continue
}
if (ch === '\\') {
escaped = true
continue
}
if (ch === stringQuote) {
inString = false
stringQuote = null
}
continue
}
// Not inside a string
if (ch === '"' || ch === "'" || ch === '`') {
inString = true
stringQuote = ch as '"' | "'" | '`'
escaped = false
continue
}
if (ch === '{') {
depth++
} else if (ch === '}') {
depth--
if (depth === 0) {
return text.slice(braceStart, i + 1)
}
}
}
// If we exit the loop without returning, either imbalanced or exceeded limit
return null
} catch {
return null
}
}

View File

@@ -0,0 +1,47 @@
import assert from 'node:assert/strict'
import test from 'node:test'
import { extractBalancedObject } from '../src/util/core/Utils'
const wrap = (before: string, obj: string, after = ';') => `${before}${obj}${after}`
test('extractBalancedObject extracts simple object after string anchor', () => {
const obj = '{"a":1,"b":2}'
const text = wrap('var dashboard = ', obj)
const out = extractBalancedObject(text, 'var dashboard = ')
assert.equal(out, obj)
})
test('extractBalancedObject extracts with regex anchor and whitespace', () => {
const obj = '{"x": {"y": 3}}'
const text = wrap('dashboard = ', obj)
const out = extractBalancedObject(text, /dashboard\s*=\s*/)
assert.equal(out, obj)
})
test('extractBalancedObject handles nested braces and strings safely', () => {
const obj = '{"t":"{ not a brace }","n": {"inner": {"v": "} in string"}}}'
const text = wrap('var dashboard = ', obj)
const out = extractBalancedObject(text, 'var dashboard = ')
assert.equal(out, obj)
})
test('extractBalancedObject handles escaped quotes inside strings', () => {
const obj = '{"s":"\\"quoted\\" braces { }","k":1}'
const text = wrap('dashboard = ', obj)
const out = extractBalancedObject(text, 'dashboard = ')
assert.equal(out, obj)
})
test('extractBalancedObject returns null when anchor missing', () => {
const text = 'no object here'
const out = extractBalancedObject(text, 'var dashboard = ')
assert.equal(out, null)
})
test('extractBalancedObject returns null on imbalanced braces or limit', () => {
const start = 'var dashboard = '
const text = `${start}{"a": {"b": 1}` // missing final brace
const out = extractBalancedObject(text, start)
assert.equal(out, null)
})

27
tests/sanitize.test.ts Normal file
View File

@@ -0,0 +1,27 @@
import assert from 'node:assert/strict'
import test from 'node:test'
import { replaceUntilStable } from '../src/util/core/Utils'
test('remove HTML comments with repeated replacement', () => {
const input = '<!<!--- comment --->>'
const out = replaceUntilStable(input, /<!--|--!?>/g, '')
assert.equal(out.includes('<!--'), false)
assert.equal(out.includes('-->'), false)
// Remaining string should not contain full HTML comment delimiters
assert.equal(/<!--|-->/g.test(out), false)
})
test('path traversal: repeated removal of ../ sequences', () => {
const input = '/./.././'
const out = replaceUntilStable(input, /\.\.\//, '')
assert.equal(out.includes('..'), false)
})
test('enforces global flag if missing', () => {
const input = 'a<script>b</script>c<script>d</script>'
// remove tag brackets to neutralize tags (illustrative only)
const out = replaceUntilStable(input, /<|>/, '')
assert.equal(out.includes('<'), false)
assert.equal(out.includes('>'), false)
})