From 500f69efccb3050da2d23d321cf8866f461f763c Mon Sep 17 00:00:00 2001 From: Malin Date: Tue, 7 Apr 2026 10:23:48 +0200 Subject: [PATCH] fix: rewrite parser with correct sitespeed.io v39 JSON paths All metric paths were wrong. Corrected based on browsertimeAggregator.js and axeAggregator.js source: - pageTimings (load, FE/BE, DOM) live at statistics.pageTimings.* not statistics.timings.pageTimings.* - FCP is statistics.paintTiming['first-contentful-paint'].median - CLS is statistics.pageinfo.cumulativeLayoutShift.median - TBT/FID/tasks are under statistics.cpu.longTasks.* - Axe data is in axe.pageSummary.json (violations.*.median), not in the browsertime file under accessibility.summary.* - sustainable pageSummary has co2PerPageView/FirstParty/ThirdParty.median (no totalCO2 at per-URL level) Co-Authored-By: Claude Sonnet 4.6 --- parser.js | 187 +++++++++++++++++++++++++++--------------------------- 1 file changed, 95 insertions(+), 92 deletions(-) diff --git a/parser.js b/parser.js index 20ac1ae..63dbff0 100644 --- a/parser.js +++ b/parser.js @@ -2,12 +2,11 @@ import { readdir, readFile } from 'fs/promises'; import { join } from 'path'; /** - * Walk the outputFolder looking for the pageSummary JSON files - * produced by sitespeed.io. The structure is: - * outputFolder/pages///.pageSummary.json + * Recursively find all *.pageSummary.json files under outputFolder. + * sitespeed.io v39 writes them into: + * pages///data/.pageSummary.json */ async function findPageSummaries(outputFolder) { - const pagesDir = join(outputFolder, 'pages'); const summaries = {}; async function walk(dir) { @@ -29,7 +28,7 @@ async function findPageSummaries(outputFolder) { } } - await walk(pagesDir); + await walk(outputFolder); return summaries; } @@ -38,6 +37,7 @@ async function readJson(filePath) { return JSON.parse(raw); } +// Safe deep-get: safe(obj, 'a', 'b', 'c') === obj?.a?.b?.c function safe(obj, ...path) { let cur = obj; for (const key of path) { @@ -47,8 +47,9 @@ function safe(obj, ...path) { return cur ?? null; } -function median(obj) { - return safe(obj, 'median') ?? safe(obj, 'mean') ?? null; +// Get the median from a stats object { median, min, max, ... } +function med(obj) { + return safe(obj, 'median'); } export async function parseResults(outputFolder, _url) { @@ -56,119 +57,121 @@ export async function parseResults(outputFolder, _url) { const metrics = {}; // ─── browsertime.pageSummary ─────────────────────────────────────────────── + // All aggregated values live under `statistics.*` + // Key layout (from browsertimeAggregator.js): + // statistics.timings.largestContentfulPaint — LCP renderTime + // statistics.timings.fullyLoaded + // statistics.pageTimings.* — pageLoadTime, backEndTime, etc. + // statistics.paintTiming.* — 'first-contentful-paint', etc. + // statistics.pageinfo.cumulativeLayoutShift — CLS + // statistics.visualMetrics.* — SpeedIndex, FirstVisualChange, etc. + // statistics.cpu.longTasks.* — totalBlockingTime, tasks, etc. const btFiles = summaries['browsertime'] || []; if (btFiles.length > 0) { const bt = await readJson(btFiles[0]); - const stats = safe(bt, 'statistics'); - const timings = safe(stats, 'timings'); - const pageTimings = safe(timings, 'pageTimings'); - const userTimings = safe(timings, 'userTimings'); - const visualMetrics = safe(stats, 'visualMetrics'); - const cpu = safe(stats, 'cpu'); - const axe = safe(bt, 'accessibility', 'summary'); + const s = safe(bt, 'statistics'); - // Core Web Vitals / timings - if (timings) { - metrics.ttfb = median(safe(timings, 'timeToFirstByte')); - metrics.fcp = median(safe(timings, 'firstContentfulPaint')); - metrics.lcp = median(safe(timings, 'largestContentfulPaint')); - metrics.cls = median(safe(timings, 'cumulativeLayoutShift')); - metrics.tbt = median(safe(timings, 'totalBlockingTime')); - metrics.max_potential_fid = median(safe(timings, 'maxPotentialFID')); - } + if (s) { + // Core Web Vitals + metrics.lcp = med(safe(s, 'timings', 'largestContentfulPaint')); + metrics.fcp = med(safe(s, 'paintTiming', 'first-contentful-paint')); + metrics.cls = med(safe(s, 'pageinfo', 'cumulativeLayoutShift')); + metrics.tbt = med(safe(s, 'cpu', 'longTasks', 'totalBlockingTime')); + metrics.ttfb = med(safe(s, 'pageTimings', 'backEndTime')); + metrics.max_potential_fid = med(safe(s, 'cpu', 'longTasks', 'maxPotentialFid')); - if (pageTimings) { - metrics.page_load_time = median(safe(pageTimings, 'pageLoadTime')); - metrics.fully_loaded = median(safe(pageTimings, 'fullyLoaded')); - metrics.dom_content_loaded = median(safe(pageTimings, 'domContentLoadedEventEnd')); - metrics.dom_interactive = median(safe(pageTimings, 'domInteractive')); - metrics.front_end_time = median(safe(pageTimings, 'frontEndTime')); - metrics.back_end_time = median(safe(pageTimings, 'backEndTime')); - metrics.time_to_first_byte = median(safe(pageTimings, 'timeToFirstByte')) - ?? metrics.ttfb; - } + // Navigation timings (pageTimings keys come from browsertime's pageTimings object) + metrics.page_load_time = med(safe(s, 'pageTimings', 'pageLoadTime')); + metrics.fully_loaded = med(safe(s, 'timings', 'fullyLoaded')); + metrics.dom_content_loaded = med(safe(s, 'pageTimings', 'domContentLoadedTime')); + metrics.dom_interactive = med(safe(s, 'pageTimings', 'domInteractiveTime')); + metrics.front_end_time = med(safe(s, 'pageTimings', 'frontEndTime')); + metrics.back_end_time = med(safe(s, 'pageTimings', 'backEndTime')); + metrics.time_to_first_byte = metrics.ttfb; - if (visualMetrics) { - metrics.speed_index = median(safe(visualMetrics, 'SpeedIndex')); - metrics.first_visual_change = median(safe(visualMetrics, 'FirstVisualChange')); - metrics.last_visual_change = median(safe(visualMetrics, 'LastVisualChange')); - metrics.visual_complete_85 = median(safe(visualMetrics, 'VisualComplete85')); - metrics.perceptual_speed_index = median(safe(visualMetrics, 'PerceptualSpeedIndex')); - } + // Visual metrics (from sitespeed-scroll-server / ffmpeg video analysis) + metrics.speed_index = med(safe(s, 'visualMetrics', 'SpeedIndex')); + metrics.first_visual_change = med(safe(s, 'visualMetrics', 'FirstVisualChange')); + metrics.last_visual_change = med(safe(s, 'visualMetrics', 'LastVisualChange')); + metrics.visual_complete_85 = med(safe(s, 'visualMetrics', 'VisualComplete85')); + metrics.perceptual_speed_index = med(safe(s, 'visualMetrics', 'PerceptualSpeedIndex')); - // CPU / Long Tasks - if (cpu) { - metrics.long_tasks_count = median(safe(cpu, 'longTasks', 'tasks')); - metrics.long_tasks_duration = median(safe(cpu, 'longTasks', 'totalDuration')); - } - - // Axe accessibility - if (axe) { - metrics.axe_critical = safe(axe, 'critical') ?? 0; - metrics.axe_serious = safe(axe, 'serious') ?? 0; - metrics.axe_moderate = safe(axe, 'moderate') ?? 0; - metrics.axe_minor = safe(axe, 'minor') ?? 0; + // CPU / Long Tasks + metrics.long_tasks_count = med(safe(s, 'cpu', 'longTasks', 'tasks')); + metrics.long_tasks_duration = med(safe(s, 'cpu', 'longTasks', 'totalDuration')); } } - // ─── coach.pageSummary ───────────────────────────────────────────────────── + // ─── axe.pageSummary ────────────────────────────────────────────────────── + // Produced by AxeAggregator.summarizeStats(): + // { violations: { critical: {median,…}, serious, moderate, minor }, … } + const axeFiles = summaries['axe'] || []; + if (axeFiles.length > 0) { + const axe = await readJson(axeFiles[0]); + metrics.axe_critical = med(safe(axe, 'violations', 'critical')) ?? 0; + metrics.axe_serious = med(safe(axe, 'violations', 'serious')) ?? 0; + metrics.axe_moderate = med(safe(axe, 'violations', 'moderate')) ?? 0; + metrics.axe_minor = med(safe(axe, 'violations', 'minor')) ?? 0; + } + + // ─── coach.pageSummary ──────────────────────────────────────────────────── + // Coach sends the median-run's full coach result: { advice: { score, performance, … } } const coachFiles = summaries['coach'] || []; if (coachFiles.length > 0) { const coach = await readJson(coachFiles[0]); - const advice = safe(coach, 'advice'); - if (advice) { - metrics.score_overall = safe(advice, 'score') ?? safe(advice, 'overall', 'score'); - metrics.score_performance = safe(advice, 'performance', 'score'); - metrics.score_accessibility = safe(advice, 'accessibility', 'score'); - metrics.score_bestpractice = safe(advice, 'bestpractice', 'score'); - metrics.score_privacy = safe(advice, 'privacy', 'score'); - } + metrics.score_overall = safe(coach, 'advice', 'score'); + metrics.score_performance = safe(coach, 'advice', 'performance', 'score'); + metrics.score_accessibility = safe(coach, 'advice', 'accessibility', 'score'); + metrics.score_bestpractice = safe(coach, 'advice', 'bestpractice', 'score'); + metrics.score_privacy = safe(coach, 'advice', 'privacy', 'score'); } - // ─── pagexray.pageSummary ────────────────────────────────────────────────── + // ─── pagexray.pageSummary ───────────────────────────────────────────────── + // Sent as pageSummary[0] with `.statistics` added by the aggregator. + // Use statistics.*.median when available; fall back to direct property. const xrayFiles = summaries['pagexray'] || []; if (xrayFiles.length > 0) { const xray = await readJson(xrayFiles[0]); - // pagexray has multiple runs — use the first or median-like object - const page = Array.isArray(xray) ? xray[0] : xray; - const ct = safe(page, 'contentTypes'); + const xs = safe(xray, 'statistics'); - if (ct) { - metrics.transfer_total = safe(page, 'transferSize'); - metrics.requests_total = safe(page, 'requests'); - metrics.transfer_html = safe(ct, 'html', 'transferSize'); - metrics.transfer_js = safe(ct, 'javascript', 'transferSize'); - metrics.transfer_css = safe(ct, 'css', 'transferSize'); - metrics.transfer_image = safe(ct, 'image', 'transferSize'); - metrics.transfer_font = safe(ct, 'font', 'transferSize'); - metrics.requests_js = safe(ct, 'javascript', 'requests'); - metrics.requests_css = safe(ct, 'css', 'requests'); - metrics.requests_image = safe(ct, 'image', 'requests'); - metrics.requests_font = safe(ct, 'font', 'requests'); + function xv(statPath, directPath) { + // Try statistics.X.median first, then direct property + const fromStats = med(safe(xs, ...statPath)); + if (fromStats !== null) return fromStats; + return safe(xray, ...directPath); } - const tp = safe(page, 'thirdParty'); - if (tp) { - metrics.third_party_requests = safe(tp, 'requests'); - metrics.third_party_transfer = safe(tp, 'transferSize'); - } + metrics.transfer_total = xv(['transferSize'], ['transferSize']); + metrics.requests_total = xv(['requests'], ['requests']); + + metrics.transfer_html = xv(['contentTypes', 'html', 'transferSize'], ['contentTypes', 'html', 'transferSize']); + metrics.transfer_js = xv(['contentTypes', 'javascript', 'transferSize'], ['contentTypes', 'javascript', 'transferSize']); + metrics.transfer_css = xv(['contentTypes', 'css', 'transferSize'], ['contentTypes', 'css', 'transferSize']); + metrics.transfer_image = xv(['contentTypes', 'image', 'transferSize'], ['contentTypes', 'image', 'transferSize']); + metrics.transfer_font = xv(['contentTypes', 'font', 'transferSize'], ['contentTypes', 'font', 'transferSize']); + + metrics.requests_js = xv(['contentTypes', 'javascript', 'requests'], ['contentTypes', 'javascript', 'requests']); + metrics.requests_css = xv(['contentTypes', 'css', 'requests'], ['contentTypes', 'css', 'requests']); + metrics.requests_image = xv(['contentTypes', 'image', 'requests'], ['contentTypes', 'image', 'requests']); + metrics.requests_font = xv(['contentTypes', 'font', 'requests'], ['contentTypes', 'font', 'requests']); + + metrics.third_party_transfer = xv(['thirdParty', 'transferSize'], ['thirdParty', 'transferSize']); + metrics.third_party_requests = xv(['thirdParty', 'requests'], ['thirdParty', 'requests']); } - // ─── sustainable.pageSummary ─────────────────────────────────────────────── + // ─── sustainable.pageSummary ────────────────────────────────────────────── + // Aggregator stores per-URL stats: + // { co2PerPageView: {median,…}, co2FirstParty: {median,…}, co2ThirdParty: {median,…} } const sustainFiles = summaries['sustainable'] || []; if (sustainFiles.length > 0) { const sust = await readJson(sustainFiles[0]); - metrics.co2_per_page_view = safe(sust, 'co2PerPageView') - ?? safe(sust, 'statistics', 'co2PerPageView', 'median'); - metrics.co2_total = safe(sust, 'totalCO2') - ?? safe(sust, 'statistics', 'totalCO2', 'median'); - metrics.co2_first_party = safe(sust, 'firstParty', 'co2') - ?? safe(sust, 'statistics', 'firstParty', 'co2', 'median'); - metrics.co2_third_party = safe(sust, 'thirdParty', 'co2') - ?? safe(sust, 'statistics', 'thirdParty', 'co2', 'median'); + metrics.co2_per_page_view = med(safe(sust, 'co2PerPageView')); + metrics.co2_total = med(safe(sust, 'co2PerPageView')); // totalCO2 not in per-URL summary + metrics.co2_first_party = med(safe(sust, 'co2FirstParty')); + metrics.co2_third_party = med(safe(sust, 'co2ThirdParty')); } - // Remove null values to avoid overwriting real DB values with NULL + // Strip nulls so we don't overwrite good DB values with NULL return Object.fromEntries( Object.entries(metrics).filter(([, v]) => v !== null && v !== undefined) );