2495 lines
72 KiB
JavaScript
Raw Normal View History

2025-06-26 15:58:29 +02:00
// Enhanced Browser Automation Content Script with Anti-Detection
console.log("OpenDia enhanced content script loaded");
2025-06-13 23:21:32 +02:00
2025-06-26 15:58:29 +02:00
// Enhanced Pattern Database with Twitter-First Priority
2025-06-25 19:07:09 +02:00
const ENHANCED_PATTERNS = {
// Authentication patterns
2025-06-26 15:58:29 +02:00
auth: {
login: {
input: [
"[type='email']",
"[name*='username' i]",
"[placeholder*='email' i]",
"[name*='login' i]",
],
2025-06-25 19:07:09 +02:00
password: ["[type='password']", "[name*='password' i]"],
2025-06-26 15:58:29 +02:00
submit: [
"[type='submit']",
"button[form]",
".login-btn",
"[aria-label*='login' i]",
],
confidence: 0.9,
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
signup: {
input: [
"[name*='register' i]",
"[placeholder*='signup' i]",
"[name*='email' i]",
],
2025-06-25 19:07:09 +02:00
submit: ["[href*='signup']", ".signup-btn", "[aria-label*='register' i]"],
2025-06-26 15:58:29 +02:00
confidence: 0.85,
},
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
// Content creation patterns - Twitter FIRST
content: {
post_create: {
textarea: [
"[data-testid='tweetTextarea_0']", // Twitter FIRST (most specific)
"[aria-label='Post text']", // Twitter specific
"[contenteditable='true']", // Generic last
"textarea[placeholder*='post' i]",
"[data-text='true']",
],
submit: [
"[data-testid='tweetButtonInline']", // Twitter inline
"[data-testid='tweetButton']", // Twitter main
".post-btn",
".publish-btn",
"[aria-label*='post' i]",
],
confidence: 0.95,
},
comment: {
textarea: [
"textarea[placeholder*='comment' i]",
"[role='textbox']",
"[placeholder*='reply' i]",
],
submit: [
".comment-btn",
"[aria-label*='comment' i]",
"[aria-label*='reply' i]",
],
confidence: 0.8,
2025-06-25 19:07:09 +02:00
},
},
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Search patterns
2025-06-26 15:58:29 +02:00
search: {
global: {
input: [
"[data-testid='SearchBox_Search_Input']", // Twitter search first
"[type='search']",
"[role='searchbox']",
"[placeholder*='search' i]",
"[name*='search' i]",
],
submit: [
"[aria-label*='search' i]",
".search-btn",
"button[type='submit']",
],
confidence: 0.85,
},
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Navigation patterns
2025-06-26 15:58:29 +02:00
nav: {
menu: {
toggle: [
"[aria-label*='menu' i]",
".menu-btn",
".hamburger",
"[data-toggle='menu']",
],
2025-06-25 19:07:09 +02:00
items: ["nav a", ".nav-item", "[role='menuitem']"],
2025-06-26 15:58:29 +02:00
confidence: 0.8,
},
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Form patterns
2025-06-26 15:58:29 +02:00
form: {
submit: {
button: [
"[type='submit']",
"button[form]",
".submit-btn",
"[aria-label*='submit' i]",
],
confidence: 0.85,
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
reset: {
2025-06-25 19:07:09 +02:00
button: ["[type='reset']", ".reset-btn", "[aria-label*='reset' i]"],
2025-06-26 15:58:29 +02:00
confidence: 0.8,
},
},
};
// Anti-Detection Platform Configuration
const ANTI_DETECTION_PLATFORMS = {
"twitter.com": {
selectors: {
textarea: "[data-testid='tweetTextarea_0']",
submit: "[data-testid='tweetButtonInline'], [data-testid='tweetButton']",
},
bypassMethod: "twitter_direct",
},
"x.com": {
selectors: {
textarea: "[data-testid='tweetTextarea_0']",
submit: "[data-testid='tweetButtonInline'], [data-testid='tweetButton']",
},
bypassMethod: "twitter_direct",
},
// Add other platforms that need special handling
"linkedin.com": {
selectors: {
textarea: "[contenteditable='true'][role='textbox']",
submit: "[data-control-name='share.post']",
},
bypassMethod: "linkedin_direct",
},
"facebook.com": {
selectors: {
textarea: "[contenteditable='true'][data-text='true']",
submit: "[data-testid='react-composer-post-button']",
},
bypassMethod: "facebook_direct",
},
2025-06-25 19:07:09 +02:00
};
// Legacy pattern database for backward compatibility
const PATTERN_DATABASE = {
2025-06-26 15:58:29 +02:00
twitter: {
domains: ["twitter.com", "x.com"],
patterns: {
post_tweet: {
textarea:
"[data-testid='tweetTextarea_0'], [contenteditable='true'][data-text='true']",
submit:
"[data-testid='tweetButtonInline'], [data-testid='tweetButton']",
confidence: 0.95,
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
search: {
input:
"[data-testid='SearchBox_Search_Input'], input[placeholder*='search' i]",
2025-06-25 19:07:09 +02:00
submit: "[data-testid='SearchBox_Search_Button']",
2025-06-26 15:58:29 +02:00
confidence: 0.9,
},
},
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
github: {
domains: ["github.com"],
patterns: {
search: {
2025-06-25 19:07:09 +02:00
input: "input[placeholder*='Search' i].form-control",
submit: "button[type='submit']",
2025-06-26 15:58:29 +02:00
confidence: 0.85,
},
},
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
universal: {
search: {
2025-06-25 19:07:09 +02:00
selectors: [
"input[type='search']",
"input[placeholder*='search' i]",
"[role='searchbox']",
2025-06-26 15:58:29 +02:00
"input[name*='search' i]",
2025-06-25 19:07:09 +02:00
],
2025-06-26 15:58:29 +02:00
confidence: 0.6,
2025-06-25 19:07:09 +02:00
},
2025-06-26 15:58:29 +02:00
submit: {
2025-06-25 19:07:09 +02:00
selectors: [
"button[type='submit']:not([disabled])",
"input[type='submit']:not([disabled])",
2025-06-26 15:58:29 +02:00
"[role='button'][aria-label*='submit' i]",
2025-06-25 19:07:09 +02:00
],
2025-06-26 15:58:29 +02:00
confidence: 0.65,
},
},
2025-06-25 19:07:09 +02:00
};
class BrowserAutomation {
constructor() {
this.elementRegistry = new Map();
this.quickRegistry = new Map(); // For phase 1 quick matches
this.idCounter = 0;
this.quickIdCounter = 0;
this.setupMessageListener();
this.setupViewportAnalyzer();
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
setupViewportAnalyzer() {
this.visibilityMap = new Map();
2025-06-26 15:58:29 +02:00
this.observer = new IntersectionObserver(
(entries) => {
entries.forEach((entry) => {
this.visibilityMap.set(entry.target, {
visible: entry.isIntersecting,
ratio: entry.intersectionRatio,
});
2025-06-25 19:07:09 +02:00
});
2025-06-26 15:58:29 +02:00
},
{ threshold: [0, 0.1, 0.5, 1.0] }
);
2025-06-25 19:07:09 +02:00
}
setupMessageListener() {
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
2025-06-26 15:58:29 +02:00
this.handleMessage(message)
.then(sendResponse)
.catch((error) => {
sendResponse({
success: false,
error: error.message,
stack: error.stack,
});
2025-06-25 19:07:09 +02:00
});
return true; // Keep message channel open for async response
});
}
async handleMessage(message) {
const { action, data } = message;
const startTime = performance.now();
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
try {
let result;
switch (action) {
2025-06-26 15:58:29 +02:00
case "analyze":
2025-06-25 19:07:09 +02:00
result = await this.analyzePage(data);
break;
2025-06-26 15:58:29 +02:00
case "extract_content":
2025-06-25 19:07:09 +02:00
result = await this.extractContent(data);
break;
2025-06-26 15:58:29 +02:00
case "element_click":
2025-06-25 19:07:09 +02:00
result = await this.clickElement(data);
break;
2025-06-26 15:58:29 +02:00
case "element_fill":
// 🎯 CRITICAL: Anti-Detection Bypass Implementation
result = await this.fillElementWithAntiDetection(data);
2025-06-25 19:07:09 +02:00
break;
2025-06-26 15:58:29 +02:00
case "wait_for":
2025-06-25 19:07:09 +02:00
result = await this.waitForCondition(data);
break;
2025-06-26 15:58:29 +02:00
case "get_element_state":
2025-06-25 19:07:09 +02:00
const element = this.getElementById(data.element_id);
if (!element) {
throw new Error(`Element not found: ${data.element_id}`);
}
result = {
element_id: data.element_id,
element_name: this.getElementName(element),
state: this.getElementState(element),
2025-06-26 15:58:29 +02:00
current_value: this.getElementValue(element),
2025-06-25 19:07:09 +02:00
};
break;
2025-06-27 15:01:01 +02:00
case "get_page_links":
result = await this.getPageLinks(data);
break;
case "page_scroll":
result = await this.scrollPage(data);
break;
2025-06-25 19:07:09 +02:00
default:
throw new Error(`Unknown action: ${action}`);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const executionTime = performance.now() - startTime;
const dataSize = new Blob([JSON.stringify(result)]).size;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return {
success: true,
data: result,
execution_time: Math.round(executionTime),
data_size: dataSize,
2025-06-26 15:58:29 +02:00
timestamp: Date.now(),
2025-06-25 19:07:09 +02:00
};
} catch (error) {
return {
success: false,
error: error.message,
stack: error.stack,
2025-06-26 15:58:29 +02:00
execution_time: Math.round(performance.now() - startTime),
2025-06-25 19:07:09 +02:00
};
}
}
2025-06-26 15:58:29 +02:00
// 🎯 ANTI-DETECTION BYPASS METHOD
async fillElementWithAntiDetection({
element_id,
value,
clear_first = true,
force_focus = true,
}) {
const element = this.getElementById(element_id);
if (!element) {
throw new Error(`Element not found: ${element_id}`);
}
const hostname = window.location.hostname;
const platformConfig = this.detectAntiDetectionPlatform(hostname);
if (platformConfig && this.shouldUseBypass(element, platformConfig)) {
console.log(
`🎯 Using ${platformConfig.bypassMethod} bypass for ${hostname}`
);
return await this.executeDirectBypass(
element,
value,
platformConfig,
element_id
);
} else {
// Use normal fillElement for non-detection platforms
console.log("🔧 Using standard fill method");
return await this.fillElementStandard({
element_id,
value,
clear_first,
force_focus,
});
}
}
detectAntiDetectionPlatform(hostname) {
// Check exact matches first
if (ANTI_DETECTION_PLATFORMS[hostname]) {
return ANTI_DETECTION_PLATFORMS[hostname];
}
// Check subdomain matches
for (const [domain, config] of Object.entries(ANTI_DETECTION_PLATFORMS)) {
if (hostname.includes(domain) || hostname.endsWith(`.${domain}`)) {
return config;
}
}
return null;
}
shouldUseBypass(element, platformConfig) {
// Check if element matches platform-specific selectors
try {
const isTextarea =
document.querySelector(platformConfig.selectors.textarea) === element;
if (isTextarea) return true;
// Also check if element matches any textarea selector pattern
const textareaSelectors = platformConfig.selectors.textarea.split(", ");
return textareaSelectors.some((selector) => {
try {
return element.matches(selector);
} catch {
return false;
}
});
} catch (error) {
console.warn("Bypass detection failed:", error);
return false;
}
}
async executeDirectBypass(element, value, platformConfig, element_id) {
try {
console.log(`🐦 Executing ${platformConfig.bypassMethod} bypass`);
switch (platformConfig.bypassMethod) {
case "twitter_direct":
return await this.twitterDirectBypass(element, value, element_id);
case "linkedin_direct":
return await this.linkedinDirectBypass(element, value, element_id);
case "facebook_direct":
return await this.facebookDirectBypass(element, value, element_id);
default:
// Generic direct bypass
return await this.genericDirectBypass(element, value, element_id);
}
} catch (error) {
console.error(
"Direct bypass failed, falling back to standard method:",
error
);
return await this.fillElementStandard({
element_id,
value,
clear_first: true,
force_focus: true,
});
}
}
async twitterDirectBypass(element, value, element_id) {
// THE WORKING FORMULA FOR TWITTER:
// 1. Focus 2. Click 3. execCommand
console.log("🐦 Twitter direct bypass - focus+click+execCommand");
// Ensure element is in view
element.scrollIntoView({ behavior: "smooth", block: "center" });
await new Promise((r) => setTimeout(r, 200));
// The magic sequence that bypasses Twitter detection
element.focus();
element.click();
const execResult = document.execCommand("insertText", false, value);
// Wait for React state to update
await new Promise((r) => setTimeout(r, 500));
// Verify success
const currentText = element.textContent || element.value || "";
const success = currentText.includes(value);
return {
success: success,
element_id: element_id,
value: value,
actual_value: currentText,
method: "twitter_direct_bypass",
execCommand_result: execResult,
element_name: this.getElementName(element),
};
}
async linkedinDirectBypass(element, value, element_id) {
console.log("💼 LinkedIn direct bypass");
element.scrollIntoView({ behavior: "smooth", block: "center" });
await new Promise((r) => setTimeout(r, 200));
// LinkedIn-specific sequence
element.focus();
element.click();
// Clear existing content first for LinkedIn
if (element.textContent) {
document.execCommand("selectAll");
document.execCommand("delete");
}
const execResult = document.execCommand("insertText", false, value);
// LinkedIn needs more time for state updates
await new Promise((r) => setTimeout(r, 800));
const currentText = element.textContent || element.value || "";
const success = currentText.includes(value);
return {
success: success,
element_id: element_id,
value: value,
actual_value: currentText,
method: "linkedin_direct_bypass",
execCommand_result: execResult,
element_name: this.getElementName(element),
};
}
async facebookDirectBypass(element, value, element_id) {
console.log("📘 Facebook direct bypass");
element.scrollIntoView({ behavior: "smooth", block: "center" });
await new Promise((r) => setTimeout(r, 200));
// Facebook-specific sequence
element.focus();
element.click();
// Facebook may need selection clearing
if (element.textContent) {
document.execCommand("selectAll");
document.execCommand("delete");
}
const execResult = document.execCommand("insertText", false, value);
// Trigger Facebook-specific events
element.dispatchEvent(new Event("input", { bubbles: true }));
element.dispatchEvent(new Event("change", { bubbles: true }));
await new Promise((r) => setTimeout(r, 600));
const currentText = element.textContent || element.value || "";
const success = currentText.includes(value);
return {
success: success,
element_id: element_id,
value: value,
actual_value: currentText,
method: "facebook_direct_bypass",
execCommand_result: execResult,
element_name: this.getElementName(element),
};
}
async genericDirectBypass(element, value, element_id) {
console.log("🔧 Generic direct bypass");
element.scrollIntoView({ behavior: "smooth", block: "center" });
await new Promise((r) => setTimeout(r, 200));
// Generic direct sequence
element.focus();
element.click();
const execResult = document.execCommand("insertText", false, value);
await new Promise((r) => setTimeout(r, 500));
const currentText = element.textContent || element.value || "";
const success = currentText.includes(value);
return {
success: success,
element_id: element_id,
value: value,
actual_value: currentText,
method: "generic_direct_bypass",
execCommand_result: execResult,
element_name: this.getElementName(element),
};
}
// Standard fill method (unchanged for compatibility)
async fillElementStandard({
element_id,
value,
clear_first = true,
force_focus = true,
}) {
const element = this.getElementById(element_id);
if (!element) {
throw new Error(`Element not found: ${element_id}`);
}
// Enhanced focus sequence for modern web apps
if (force_focus) {
await this.ensureProperFocus(element);
} else {
element.focus();
}
// Clear existing content if requested
if (clear_first) {
await this.clearElementContent(element);
}
// Fill the value with proper event sequence
await this.fillWithEvents(element, value);
// Validate the fill was successful
const actualValue = this.getElementValue(element);
const success = actualValue.includes(value);
return {
success,
element_id,
value,
actual_value: actualValue,
element_name: this.getElementName(element),
method: "standard_fill",
focus_applied: force_focus,
};
}
async analyzePage({
intent_hint,
phase = "discover",
focus_areas,
element_ids,
max_results = 5,
}) {
2025-06-25 19:07:09 +02:00
const startTime = performance.now();
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Two-phase approach
2025-06-26 15:58:29 +02:00
if (phase === "discover") {
2025-06-25 19:07:09 +02:00
return await this.quickDiscovery({ intent_hint, max_results });
2025-06-26 15:58:29 +02:00
} else if (phase === "detailed") {
return await this.detailedAnalysis({
intent_hint,
focus_areas,
element_ids,
max_results,
});
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Legacy single-phase approach for backward compatibility
2025-06-26 15:58:29 +02:00
return await this.legacyAnalysis({
intent_hint,
focus_area: focus_areas?.[0],
max_results,
});
2025-06-25 19:07:09 +02:00
}
async quickDiscovery({ intent_hint, max_results = 5 }) {
const startTime = performance.now();
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Detect page type and get basic metrics
const pageType = this.detectPageType();
const viewportElements = this.countViewportElements();
2025-06-26 15:58:29 +02:00
2025-06-27 17:19:49 +02:00
// Use default max results limit
max_results = Math.min(max_results, 5);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Try to find obvious matches using enhanced patterns
let quickMatches = [];
2025-06-26 15:58:29 +02:00
let usedMethod = "quick_discovery";
2025-06-25 19:07:09 +02:00
try {
2025-06-26 15:58:29 +02:00
// Check for anti-detection bypass first
const hostname = window.location.hostname;
const platformConfig = this.detectAntiDetectionPlatform(hostname);
if (
platformConfig &&
(intent_hint.includes("post") || intent_hint.includes("tweet"))
) {
const bypassResult = await this.tryAntiDetectionPatterns(
intent_hint,
platformConfig
);
if (bypassResult.confidence > 0.9) {
quickMatches = bypassResult.elements
.slice(0, 3)
.map((el) => this.compressElement(el, true));
usedMethod = "anti_detection_bypass";
}
}
// Fallback to enhanced patterns
if (
quickMatches.length === 0 &&
(bestMethod === "enhanced_pattern_match" ||
bestMethod === "pattern_database")
) {
2025-06-25 19:07:09 +02:00
const patternResult = await this.tryEnhancedPatterns(intent_hint);
if (patternResult.confidence > 0.7) {
2025-06-26 15:58:29 +02:00
quickMatches = patternResult.elements
.slice(0, 3)
.map((el) => this.compressElement(el, true));
usedMethod = "enhanced_patterns";
2025-06-25 19:07:09 +02:00
}
}
} catch (error) {
2025-06-26 15:58:29 +02:00
console.warn("Enhanced patterns failed:", error);
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// If no pattern matches, do a quick viewport scan
if (quickMatches.length === 0) {
quickMatches = await this.quickViewportScan(intent_hint, 3);
2025-06-26 15:58:29 +02:00
usedMethod = "viewport_scan";
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const intentMatch = this.scoreIntentMatch(intent_hint, quickMatches);
const suggestedAreas = this.suggestPhase2Areas(quickMatches, intent_hint);
const executionTime = Math.round(performance.now() - startTime);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const result = {
summary: {
page_type: pageType,
intent_match: intentMatch,
element_count: viewportElements,
viewport_elements: quickMatches.length,
2025-06-26 15:58:29 +02:00
suggested_phase2: suggestedAreas,
anti_detection_platform: this.detectAntiDetectionPlatform(
window.location.hostname
)
? window.location.hostname
: null,
2025-06-25 19:07:09 +02:00
},
quick_matches: quickMatches,
token_estimate: this.estimatePhase2Tokens(quickMatches),
method: usedMethod,
execution_time: executionTime,
intent_hint: intent_hint, // Add this for server.js compatibility
2025-06-26 15:58:29 +02:00
elements: quickMatches, // Add this for backward compatibility
2025-06-25 19:07:09 +02:00
};
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return result;
}
2025-06-26 15:58:29 +02:00
async tryAntiDetectionPatterns(intent_hint, platformConfig) {
const elements = [];
// Try to find platform-specific elements
try {
const textareaElement = document.querySelector(
platformConfig.selectors.textarea
);
if (textareaElement && this.isLikelyVisible(textareaElement)) {
const elementId = this.registerElement(textareaElement);
elements.push({
id: elementId,
type: "textarea",
selector: platformConfig.selectors.textarea,
name: this.getElementName(textareaElement),
confidence: 0.95, // High confidence for anti-detection platforms
element: textareaElement,
});
}
const submitElement = document.querySelector(
platformConfig.selectors.submit
);
if (submitElement && this.isLikelyVisible(submitElement)) {
const elementId = this.registerElement(submitElement);
elements.push({
id: elementId,
type: "button",
selector: platformConfig.selectors.submit,
name: this.getElementName(submitElement),
confidence: 0.95,
element: submitElement,
});
}
} catch (error) {
console.warn("Anti-detection pattern matching failed:", error);
}
return {
elements,
confidence: elements.length > 0 ? 0.95 : 0,
method: "anti_detection_patterns",
platform: window.location.hostname,
};
}
async detailedAnalysis({
intent_hint,
focus_areas,
element_ids,
max_results = 10,
}) {
2025-06-25 19:07:09 +02:00
const startTime = performance.now();
const pageType = this.detectPageType();
2025-06-26 15:58:29 +02:00
2025-06-27 17:19:49 +02:00
// Use default max results limit
max_results = Math.min(max_results, 7); // Allow slightly more for detailed analysis
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
let elements = [];
2025-06-26 15:58:29 +02:00
let method = "detailed_analysis";
2025-06-25 19:07:09 +02:00
// Expand specific quick matches if provided
if (element_ids?.length) {
elements = await this.expandQuickMatches(element_ids);
2025-06-26 15:58:29 +02:00
method = "expanded_matches";
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Analyze specific focus areas
if (focus_areas?.length) {
2025-06-26 15:58:29 +02:00
const areaElements = await this.analyzeFocusAreas(
focus_areas,
intent_hint
);
2025-06-25 19:07:09 +02:00
elements = [...elements, ...areaElements];
2025-06-26 15:58:29 +02:00
method = elements.length > 0 ? "focus_area_analysis" : method;
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// If no specific analysis requested, do full enhanced analysis
if (elements.length === 0) {
elements = await this.fullEnhancedAnalysis(intent_hint, max_results);
2025-06-26 15:58:29 +02:00
method = "full_enhanced_analysis";
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Deduplicate and enhance with metadata
elements = this.deduplicateElements(elements);
elements = await this.enhanceElementMetadata(elements);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Apply compact fingerprinting
2025-06-26 15:58:29 +02:00
elements = elements
.slice(0, max_results)
.map((el) => this.compressElement(el, false));
2025-06-25 19:07:09 +02:00
const executionTime = Math.round(performance.now() - startTime);
const result = {
elements,
2025-06-26 15:58:29 +02:00
interaction_ready: elements.every((el) => el.conf > 50),
2025-06-25 19:07:09 +02:00
method,
execution_time: executionTime,
2025-06-26 15:58:29 +02:00
intent_hint: intent_hint, // Add this for server.js compatibility
2025-06-25 19:07:09 +02:00
};
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return result;
}
async legacyAnalysis({ intent_hint, focus_area, max_results = 5 }) {
const startTime = performance.now();
let result;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
try {
// Try enhanced patterns first
result = await this.tryEnhancedPatterns(intent_hint);
if (result.confidence > 0.8) {
2025-06-26 15:58:29 +02:00
return this.formatAnalysisResult(
result,
"enhanced_patterns",
startTime
);
2025-06-25 19:07:09 +02:00
}
} catch (error) {
2025-06-26 15:58:29 +02:00
console.warn("Enhanced patterns failed, trying legacy patterns:", error);
2025-06-25 19:07:09 +02:00
try {
// Fallback to legacy pattern database
result = await this.tryPatternDatabase(intent_hint);
if (result.confidence > 0.8) {
2025-06-26 15:58:29 +02:00
return this.formatAnalysisResult(
result,
"pattern_database",
startTime
);
2025-06-25 19:07:09 +02:00
}
} catch (legacyError) {
2025-06-26 15:58:29 +02:00
console.warn("Legacy pattern database failed:", legacyError);
2025-06-25 19:07:09 +02:00
}
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Final fallback to semantic analysis
result = await this.trySemanticAnalysis(intent_hint, focus_area);
2025-06-26 15:58:29 +02:00
return this.formatAnalysisResult(result, "semantic_analysis", startTime);
2025-06-25 19:07:09 +02:00
}
async tryEnhancedPatterns(intent_hint) {
const [category, action] = this.parseIntent(intent_hint);
const pattern = ENHANCED_PATTERNS[category]?.[action];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
if (!pattern) {
return this.tryUniversalPatterns(intent_hint);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const elements = this.findPatternElements(pattern);
return {
elements: elements.slice(0, 3),
confidence: pattern.confidence,
2025-06-26 15:58:29 +02:00
method: "enhanced_pattern_match",
2025-06-25 19:07:09 +02:00
category,
2025-06-26 15:58:29 +02:00
action,
2025-06-25 19:07:09 +02:00
};
}
parseIntent(intent) {
const intentLower = intent.toLowerCase();
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check for authentication patterns
2025-06-26 15:58:29 +02:00
if (
intentLower.includes("login") ||
intentLower.includes("sign in") ||
intentLower.includes("log in")
) {
return ["auth", "login"];
}
if (
intentLower.includes("signup") ||
intentLower.includes("sign up") ||
intentLower.includes("register") ||
intentLower.includes("create account")
) {
return ["auth", "signup"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check for content creation patterns
2025-06-26 15:58:29 +02:00
if (
intentLower.includes("tweet") ||
intentLower.includes("post") ||
intentLower.includes("compose") ||
intentLower.includes("create") ||
intentLower.includes("write") ||
intentLower.includes("publish")
) {
return ["content", "post_create"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
if (intentLower.includes("comment") || intentLower.includes("reply")) {
return ["content", "comment"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check for search patterns
2025-06-26 15:58:29 +02:00
if (
intentLower.includes("search") ||
intentLower.includes("find") ||
intentLower.includes("look for")
) {
return ["search", "global"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check for navigation patterns
2025-06-26 15:58:29 +02:00
if (
intentLower.includes("menu") ||
intentLower.includes("navigation") ||
intentLower.includes("nav")
) {
return ["nav", "menu"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check for form patterns
2025-06-26 15:58:29 +02:00
if (
intentLower.includes("submit") ||
intentLower.includes("send") ||
intentLower.includes("save")
) {
return ["form", "submit"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
if (
intentLower.includes("reset") ||
intentLower.includes("clear") ||
intentLower.includes("cancel")
) {
return ["form", "reset"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Fallback - try to infer from context
2025-06-26 15:58:29 +02:00
if (intentLower.includes("button") || intentLower.includes("click")) {
return ["form", "submit"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
if (
intentLower.includes("input") ||
intentLower.includes("field") ||
intentLower.includes("text")
) {
return ["content", "post_create"];
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Default fallback
2025-06-26 15:58:29 +02:00
return ["content", "post_create"]; // More useful default than search
2025-06-25 19:07:09 +02:00
}
findPatternElements(pattern) {
const elements = [];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
for (const [elementType, selectors] of Object.entries(pattern)) {
2025-06-26 15:58:29 +02:00
if (elementType === "confidence") continue;
2025-06-25 19:07:09 +02:00
for (const selector of selectors) {
const element = document.querySelector(selector);
if (element && this.isLikelyVisible(element)) {
const elementId = this.registerElement(element);
elements.push({
id: elementId,
type: elementType,
selector: selector,
name: this.getElementName(element),
confidence: pattern.confidence || 0.8,
2025-06-26 15:58:29 +02:00
element: element,
2025-06-25 19:07:09 +02:00
});
break; // Take first match per element type
}
}
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return elements;
}
tryUniversalPatterns(intent_hint) {
const intentLower = intent_hint.toLowerCase();
let selectors = [];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Content creation patterns
2025-06-26 15:58:29 +02:00
if (
intentLower.includes("tweet") ||
intentLower.includes("post") ||
intentLower.includes("compose") ||
intentLower.includes("create") ||
intentLower.includes("write")
) {
2025-06-25 19:07:09 +02:00
selectors = [
2025-06-26 15:58:29 +02:00
"[data-testid='tweetTextarea_0']", // Twitter first!
"[contenteditable='true']",
"textarea[placeholder*='tweet' i]",
2025-06-25 19:07:09 +02:00
"textarea[placeholder*='post' i]",
"textarea[placeholder*='what' i]",
"[data-text='true']",
"[role='textbox']",
2025-06-26 15:58:29 +02:00
"textarea:not([style*='display: none'])",
2025-06-25 19:07:09 +02:00
];
}
2025-06-26 15:58:29 +02:00
// Authentication patterns
else if (intentLower.includes("login") || intentLower.includes("sign in")) {
2025-06-25 19:07:09 +02:00
selectors = [
2025-06-26 15:58:29 +02:00
"[type='email']",
"[name*='username' i]",
2025-06-25 19:07:09 +02:00
"[placeholder*='email' i]",
"[placeholder*='username' i]",
2025-06-26 15:58:29 +02:00
"input[name*='login' i]",
2025-06-25 19:07:09 +02:00
];
2025-06-26 15:58:29 +02:00
} else if (
intentLower.includes("signup") ||
intentLower.includes("register")
) {
2025-06-25 19:07:09 +02:00
selectors = [
2025-06-26 15:58:29 +02:00
"[href*='signup']",
".signup-btn",
2025-06-25 19:07:09 +02:00
"[aria-label*='register' i]",
"button[data-testid*='signup' i]",
2025-06-26 15:58:29 +02:00
"a[href*='register']",
2025-06-25 19:07:09 +02:00
];
}
// Search patterns
2025-06-26 15:58:29 +02:00
else if (intentLower.includes("search") || intentLower.includes("find")) {
2025-06-25 19:07:09 +02:00
selectors = [
2025-06-26 15:58:29 +02:00
"[data-testid='SearchBox_Search_Input']", // Twitter search first
"[type='search']",
"[role='searchbox']",
2025-06-25 19:07:09 +02:00
"[placeholder*='search' i]",
"[data-testid*='search' i]",
2025-06-26 15:58:29 +02:00
"input[name*='search' i]",
2025-06-25 19:07:09 +02:00
];
}
// Generic fallback - look for interactive elements
else {
selectors = [
"button:not([disabled])",
"[contenteditable='true']",
"textarea",
"[type='submit']",
"[role='button']",
2025-06-26 15:58:29 +02:00
"input[type='text']",
2025-06-25 19:07:09 +02:00
];
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const elements = [];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
for (const selector of selectors) {
const foundElements = document.querySelectorAll(selector);
for (const element of foundElements) {
if (this.isLikelyVisible(element)) {
const elementId = this.registerElement(element);
elements.push({
id: elementId,
type: this.inferElementType(element, intent_hint),
selector: selector,
name: this.getElementName(element),
2025-06-26 15:58:29 +02:00
confidence:
0.5 + this.calculateConfidence(element, intent_hint) * 0.3,
element: element,
2025-06-25 19:07:09 +02:00
});
if (elements.length >= 3) break; // Limit to 3 elements
}
}
if (elements.length >= 3) break;
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return {
elements,
2025-06-26 15:58:29 +02:00
confidence:
elements.length > 0
? Math.max(...elements.map((e) => e.confidence))
: 0,
method: "universal_pattern",
2025-06-25 19:07:09 +02:00
};
2025-06-13 23:21:32 +02:00
}
2025-06-25 19:07:09 +02:00
async tryPatternDatabase(intentHint) {
const hostname = window.location.hostname;
const siteKey = this.detectSite(hostname);
2025-06-26 15:58:29 +02:00
if (siteKey === "universal") {
2025-06-25 19:07:09 +02:00
return this.getUniversalPattern(intentHint);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const siteConfig = PATTERN_DATABASE[siteKey];
const pattern = siteConfig?.patterns?.[intentHint];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
if (!pattern) {
throw new Error(`No pattern found for ${intentHint} on ${siteKey}`);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const elements = [];
for (const [elementType, selector] of Object.entries(pattern)) {
2025-06-26 15:58:29 +02:00
if (elementType === "confidence") continue;
2025-06-25 19:07:09 +02:00
const element = document.querySelector(selector);
if (element) {
const elementId = this.registerElement(element);
elements.push({
id: elementId,
type: elementType,
selector: selector,
name: this.getElementName(element),
2025-06-26 15:58:29 +02:00
confidence: pattern.confidence || 0.8,
2025-06-25 19:07:09 +02:00
});
}
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return {
elements,
confidence: pattern.confidence || 0.8,
2025-06-26 15:58:29 +02:00
site: siteKey,
2025-06-25 19:07:09 +02:00
};
}
detectSite(hostname) {
for (const [siteKey, config] of Object.entries(PATTERN_DATABASE)) {
2025-06-26 15:58:29 +02:00
if (siteKey === "universal") continue;
if (
config.domains?.some(
(domain) => hostname === domain || hostname.endsWith(`.${domain}`)
)
) {
2025-06-25 19:07:09 +02:00
return siteKey;
}
}
2025-06-26 15:58:29 +02:00
return "universal";
2025-06-25 19:07:09 +02:00
}
getUniversalPattern(intentHint) {
const universalPatterns = PATTERN_DATABASE.universal;
const pattern = universalPatterns[intentHint];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
if (!pattern) {
throw new Error(`No universal pattern for ${intentHint}`);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const elements = [];
for (const selector of pattern.selectors) {
const element = document.querySelector(selector);
if (element) {
const elementId = this.registerElement(element);
elements.push({
id: elementId,
type: intentHint,
selector: selector,
name: this.getElementName(element),
2025-06-26 15:58:29 +02:00
confidence: pattern.confidence,
2025-06-25 19:07:09 +02:00
});
break; // Take first match for universal patterns
}
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return {
elements,
confidence: pattern.confidence,
2025-06-26 15:58:29 +02:00
site: "universal",
2025-06-25 19:07:09 +02:00
};
}
async trySemanticAnalysis(intentHint, focusArea) {
const relevantElements = document.querySelectorAll(`
button, input, select, textarea, a[href],
[role="button"], [role="textbox"], [role="searchbox"],
[aria-label], [data-testid]
`);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const elements = Array.from(relevantElements)
2025-06-26 15:58:29 +02:00
.filter((el) => this.isVisible(el))
2025-06-25 19:07:09 +02:00
.slice(0, 20)
2025-06-26 15:58:29 +02:00
.map((element) => {
2025-06-25 19:07:09 +02:00
const elementId = this.registerElement(element);
return {
id: elementId,
type: this.inferElementType(element, intentHint),
selector: this.generateSelector(element),
name: this.getElementName(element),
2025-06-26 15:58:29 +02:00
confidence: this.calculateConfidence(element, intentHint),
2025-06-25 19:07:09 +02:00
};
})
2025-06-26 15:58:29 +02:00
.filter((el) => el.confidence > 0.3)
2025-06-25 19:07:09 +02:00
.sort((a, b) => b.confidence - a.confidence);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return {
elements,
2025-06-26 15:58:29 +02:00
confidence:
elements.length > 0
? Math.max(...elements.map((e) => e.confidence))
: 0,
2025-06-25 19:07:09 +02:00
};
}
async extractContent({ content_type, max_items = 20, summarize = true }) {
const startTime = performance.now();
const extractors = {
2025-06-26 15:58:29 +02:00
article: () => this.extractArticleContent(),
search_results: () => this.extractSearchResults(max_items),
posts: () => this.extractPosts(max_items),
2025-06-25 19:07:09 +02:00
};
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const extractor = extractors[content_type];
if (!extractor) {
throw new Error(`Unknown content type: ${content_type}`);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const rawContent = extractor();
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
if (summarize) {
// Return summary instead of full content to save tokens
return {
content_type,
summary: this.summarizeContent(rawContent, content_type),
items_found: Array.isArray(rawContent) ? rawContent.length : 1,
2025-06-26 15:58:29 +02:00
sample_items: Array.isArray(rawContent)
? rawContent.slice(0, 3)
: [rawContent],
2025-06-25 19:07:09 +02:00
extraction_method: this.getExtractionMethod(content_type),
token_estimate: this.estimateContentTokens(rawContent),
execution_time: Math.round(performance.now() - startTime),
2025-06-26 15:58:29 +02:00
extracted_at: new Date().toISOString(),
2025-06-25 19:07:09 +02:00
};
} else {
// Legacy full content extraction
return {
content: rawContent,
2025-06-26 15:58:29 +02:00
method: "semantic_extraction",
2025-06-25 19:07:09 +02:00
content_type: content_type,
execution_time: Math.round(performance.now() - startTime),
2025-06-26 15:58:29 +02:00
extracted_at: new Date().toISOString(),
2025-06-25 19:07:09 +02:00
};
}
}
extractArticleContent() {
2025-06-26 15:58:29 +02:00
const article = document.querySelector(
'article, [role="article"], .article-content, main'
);
const title = document
.querySelector("h1, .article-title, .post-title")
?.textContent?.trim();
2025-06-25 19:07:09 +02:00
const content = article?.textContent?.trim() || this.extractMainContent();
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return {
title,
content,
2025-06-26 15:58:29 +02:00
word_count: content?.split(/\s+/).length || 0,
2025-06-25 19:07:09 +02:00
};
}
extractMainContent() {
// Simple heuristic to find main content
2025-06-26 15:58:29 +02:00
const candidates = document.querySelectorAll(
"main, .content, .post-content, .article-body"
);
2025-06-25 19:07:09 +02:00
let bestCandidate = null;
let maxTextLength = 0;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
for (const candidate of candidates) {
const textLength = candidate.textContent.trim().length;
if (textLength > maxTextLength) {
maxTextLength = textLength;
bestCandidate = candidate;
}
}
2025-06-26 15:58:29 +02:00
return (
bestCandidate?.textContent?.trim() || document.body.textContent.trim()
);
2025-06-25 19:07:09 +02:00
}
extractSearchResults(max_items = 20) {
// Common search result patterns
const selectors = [
'.search-result, .result-item, [data-testid*="result"]',
2025-06-26 15:58:29 +02:00
".g, .result, .search-item", // Google-style
2025-06-25 19:07:09 +02:00
'li[data-testid="search-result"], .SearchResult', // Twitter/X
2025-06-26 15:58:29 +02:00
".Box-row, .issue-list-item", // GitHub
"article, .post, .entry", // Generic content
2025-06-25 19:07:09 +02:00
];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
let results = [];
for (const selector of selectors) {
const elements = document.querySelectorAll(selector);
if (elements.length > 0) {
2025-06-26 15:58:29 +02:00
results = Array.from(elements)
.slice(0, max_items)
.map((el, index) => ({
index: index + 1,
title: this.extractResultTitle(el),
summary: this.extractResultSummary(el),
link: this.extractResultLink(el),
type: this.detectResultType(el),
score: this.scoreSearchResult(el),
}));
2025-06-25 19:07:09 +02:00
break;
}
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return results;
}
extractPosts(max_items = 20) {
// Social media post patterns
const selectors = [
'[data-testid="tweet"], .tweet, .post',
'article[role="article"]', // Twitter/X posts
2025-06-26 15:58:29 +02:00
".timeline-item, .feed-item",
".status, .update, .entry",
2025-06-25 19:07:09 +02:00
];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
let posts = [];
for (const selector of selectors) {
const elements = document.querySelectorAll(selector);
if (elements.length > 0) {
2025-06-26 15:58:29 +02:00
posts = Array.from(elements)
.slice(0, max_items)
.map((el, index) => ({
index: index + 1,
text: this.extractPostText(el),
author: this.extractPostAuthor(el),
timestamp: this.extractPostTimestamp(el),
metrics: this.extractPostMetrics(el),
has_media: this.hasPostMedia(el),
post_type: this.detectPostType(el),
}));
2025-06-25 19:07:09 +02:00
break;
}
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return posts;
}
// Content summarization methods
summarizeContent(content, content_type) {
2025-06-26 15:58:29 +02:00
switch (content_type) {
case "article":
2025-06-25 19:07:09 +02:00
return this.summarizeArticle(content);
2025-06-26 15:58:29 +02:00
case "search_results":
2025-06-25 19:07:09 +02:00
return this.summarizeSearchResults(content);
2025-06-26 15:58:29 +02:00
case "posts":
2025-06-25 19:07:09 +02:00
return this.summarizePosts(content);
default:
2025-06-26 15:58:29 +02:00
return { summary: "Unknown content type" };
2025-06-25 19:07:09 +02:00
}
}
summarizeArticle(content) {
return {
2025-06-26 15:58:29 +02:00
title: content.title || "Untitled",
2025-06-25 19:07:09 +02:00
word_count: content.word_count || 0,
reading_time: Math.ceil((content.word_count || 0) / 200),
2025-06-26 15:58:29 +02:00
has_images: document.querySelectorAll("img").length > 0,
has_videos:
document.querySelectorAll(
'video, iframe[src*="youtube"], iframe[src*="vimeo"]'
).length > 0,
preview:
content.content?.substring(0, 200) +
(content.content?.length > 200 ? "..." : ""),
estimated_tokens: Math.ceil((content.content?.length || 0) / 4),
2025-06-25 19:07:09 +02:00
};
}
summarizeSearchResults(results) {
2025-06-26 15:58:29 +02:00
const domains = results
.map((r) => r.link)
.filter(Boolean)
.map((url) => {
try {
return new URL(url).hostname;
} catch {
return null;
}
})
.filter(Boolean);
2025-06-25 19:07:09 +02:00
return {
total_results: results.length,
2025-06-26 15:58:29 +02:00
result_types: [...new Set(results.map((r) => r.type))],
2025-06-25 19:07:09 +02:00
top_domains: this.getTopDomains(domains),
2025-06-26 15:58:29 +02:00
avg_score:
results.reduce((sum, r) => sum + (r.score || 0), 0) / results.length,
has_sponsored: results.some((r) => r.type === "sponsored"),
quality_score: this.calculateQualityScore(results),
2025-06-25 19:07:09 +02:00
};
}
summarizePosts(posts) {
2025-06-26 15:58:29 +02:00
const totalTextLength = posts.reduce(
(sum, p) => sum + (p.text?.length || 0),
0
);
const totalLikes = posts.reduce(
(sum, p) => sum + (p.metrics?.likes || 0),
0
);
2025-06-25 19:07:09 +02:00
return {
post_count: posts.length,
avg_length: Math.round(totalTextLength / posts.length),
2025-06-26 15:58:29 +02:00
has_media_count: posts.filter((p) => p.has_media).length,
2025-06-25 19:07:09 +02:00
engagement_total: totalLikes,
avg_engagement: Math.round(totalLikes / posts.length),
2025-06-26 15:58:29 +02:00
post_types: [...new Set(posts.map((p) => p.post_type))],
authors: [...new Set(posts.map((p) => p.author).filter(Boolean))].length,
estimated_tokens: Math.ceil(totalTextLength / 4),
2025-06-25 19:07:09 +02:00
};
}
// Helper methods for extraction
extractResultTitle(element) {
2025-06-26 15:58:29 +02:00
const titleSelectors = [
'h1, h2, h3, .title, .headline, [data-testid*="title"]',
];
2025-06-25 19:07:09 +02:00
for (const selector of titleSelectors) {
const title = element.querySelector(selector)?.textContent?.trim();
if (title) return title.substring(0, 100);
}
2025-06-26 15:58:29 +02:00
return element.textContent?.trim()?.substring(0, 50) || "No title";
2025-06-25 19:07:09 +02:00
}
extractResultSummary(element) {
2025-06-26 15:58:29 +02:00
const summarySelectors = [".summary, .description, .snippet, .excerpt"];
2025-06-25 19:07:09 +02:00
for (const selector of summarySelectors) {
const summary = element.querySelector(selector)?.textContent?.trim();
if (summary) return summary.substring(0, 200);
}
2025-06-26 15:58:29 +02:00
return element.textContent?.trim()?.substring(0, 150) || "";
2025-06-25 19:07:09 +02:00
}
extractResultLink(element) {
2025-06-26 15:58:29 +02:00
const link =
element.querySelector("a[href]")?.href ||
element.closest("a[href]")?.href ||
element.getAttribute("href");
2025-06-25 19:07:09 +02:00
return link || null;
}
detectResultType(element) {
2025-06-26 15:58:29 +02:00
if (
element.textContent?.toLowerCase().includes("sponsored") ||
element.querySelector(".ad, .sponsored")
)
return "sponsored";
if (element.querySelector("img, video")) return "media";
if (element.querySelector(".price, .cost")) return "product";
return "organic";
2025-06-25 19:07:09 +02:00
}
scoreSearchResult(element) {
let score = 0.5;
2025-06-26 15:58:29 +02:00
if (element.querySelector("h1, h2, h3")) score += 0.2;
if (element.querySelector("img")) score += 0.1;
2025-06-25 19:07:09 +02:00
if (element.textContent?.length > 100) score += 0.1;
2025-06-26 15:58:29 +02:00
if (element.querySelector("a[href]")) score += 0.1;
2025-06-25 19:07:09 +02:00
return Math.min(score, 1.0);
}
extractPostText(element) {
const textSelectors = [
'[data-testid="tweetText"], .tweet-text',
2025-06-26 15:58:29 +02:00
".post-content, .entry-content",
".status-content, .message-content",
2025-06-25 19:07:09 +02:00
];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
for (const selector of textSelectors) {
const text = element.querySelector(selector)?.textContent?.trim();
if (text) return text.substring(0, 280);
}
2025-06-26 15:58:29 +02:00
return element.textContent?.trim()?.substring(0, 280) || "";
2025-06-25 19:07:09 +02:00
}
extractPostAuthor(element) {
const authorSelectors = [
'[data-testid="User-Name"], .username',
2025-06-26 15:58:29 +02:00
".author, .user-name, .handle",
2025-06-25 19:07:09 +02:00
];
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
for (const selector of authorSelectors) {
const author = element.querySelector(selector)?.textContent?.trim();
if (author) return author.substring(0, 50);
}
2025-06-26 15:58:29 +02:00
return "Unknown";
2025-06-25 19:07:09 +02:00
}
extractPostTimestamp(element) {
const timeSelectors = ['time, .timestamp, .date, [data-testid*="time"]'];
for (const selector of timeSelectors) {
const time = element.querySelector(selector);
if (time) {
2025-06-26 15:58:29 +02:00
return (
time.getAttribute("datetime") || time.textContent?.trim() || null
);
2025-06-25 19:07:09 +02:00
}
}
return null;
}
extractPostMetrics(element) {
const metrics = {};
const likeSelectors = ['[data-testid*="like"], .like-count, .heart-count'];
2025-06-26 15:58:29 +02:00
const replySelectors = [
'[data-testid*="reply"], .reply-count, .comment-count',
];
const shareSelectors = [
'[data-testid*="retweet"], .share-count, .repost-count',
];
2025-06-25 19:07:09 +02:00
for (const selector of likeSelectors) {
2025-06-26 15:58:29 +02:00
const likes = element
.querySelector(selector)
?.textContent?.match(/\d+/)?.[0];
2025-06-25 19:07:09 +02:00
if (likes) metrics.likes = parseInt(likes);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
for (const selector of replySelectors) {
2025-06-26 15:58:29 +02:00
const replies = element
.querySelector(selector)
?.textContent?.match(/\d+/)?.[0];
2025-06-25 19:07:09 +02:00
if (replies) metrics.replies = parseInt(replies);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
for (const selector of shareSelectors) {
2025-06-26 15:58:29 +02:00
const shares = element
.querySelector(selector)
?.textContent?.match(/\d+/)?.[0];
2025-06-25 19:07:09 +02:00
if (shares) metrics.shares = parseInt(shares);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return metrics;
}
hasPostMedia(element) {
return element.querySelector('img, video, [data-testid*="media"]') !== null;
}
detectPostType(element) {
2025-06-26 15:58:29 +02:00
if (element.querySelector('[data-testid*="retweet"]')) return "repost";
if (element.querySelector('[data-testid*="reply"]')) return "reply";
if (element.hasAttribute("data-promoted")) return "promoted";
return "original";
2025-06-25 19:07:09 +02:00
}
getTopDomains(domains, limit = 5) {
const domainCounts = {};
2025-06-26 15:58:29 +02:00
domains.forEach((domain) => {
2025-06-25 19:07:09 +02:00
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
});
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return Object.entries(domainCounts)
2025-06-26 15:58:29 +02:00
.sort(([, a], [, b]) => b - a)
2025-06-25 19:07:09 +02:00
.slice(0, limit)
.map(([domain, count]) => ({ domain, count }));
}
calculateQualityScore(results) {
2025-06-26 15:58:29 +02:00
const avgScore =
results.reduce((sum, r) => sum + (r.score || 0), 0) / results.length;
const hasLinks = results.filter((r) => r.link).length / results.length;
const hasContent =
results.filter((r) => r.summary?.length > 50).length / results.length;
return Math.round(
(avgScore * 0.4 + hasLinks * 0.3 + hasContent * 0.3) * 100
);
2025-06-25 19:07:09 +02:00
}
getExtractionMethod(content_type) {
const hostname = window.location.hostname;
2025-06-26 15:58:29 +02:00
if (hostname.includes("twitter") || hostname.includes("x.com"))
return "twitter_patterns";
if (hostname.includes("github")) return "github_patterns";
if (hostname.includes("google")) return "google_patterns";
2025-06-25 19:07:09 +02:00
return `semantic_${content_type}`;
}
estimateContentTokens(content) {
if (Array.isArray(content)) {
return content.reduce((sum, item) => {
return sum + Math.ceil(JSON.stringify(item).length / 4);
}, 0);
} else {
return Math.ceil(JSON.stringify(content).length / 4);
}
}
2025-06-26 15:58:29 +02:00
async clickElement({ element_id, click_type = "left", wait_after = 500 }) {
2025-06-25 19:07:09 +02:00
const element = this.getElementById(element_id);
if (!element) {
throw new Error(`Element not found: ${element_id}`);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Scroll element into view
2025-06-26 15:58:29 +02:00
element.scrollIntoView({ behavior: "smooth", block: "center" });
await new Promise((resolve) => setTimeout(resolve, 200));
2025-06-25 19:07:09 +02:00
// Click the element
2025-06-26 15:58:29 +02:00
if (click_type === "right") {
element.dispatchEvent(new MouseEvent("contextmenu", { bubbles: true }));
2025-06-25 19:07:09 +02:00
} else {
element.click();
}
2025-06-26 15:58:29 +02:00
await new Promise((resolve) => setTimeout(resolve, wait_after));
2025-06-25 19:07:09 +02:00
return {
success: true,
element_id,
click_type,
element_name: this.getElementName(element),
};
}
async ensureProperFocus(element) {
// Scroll element into view first
2025-06-26 15:58:29 +02:00
element.scrollIntoView({ behavior: "smooth", block: "center" });
await new Promise((resolve) => setTimeout(resolve, 200));
2025-06-25 19:07:09 +02:00
// Simulate proper mouse interaction sequence
const rect = element.getBoundingClientRect();
const centerX = rect.left + rect.width / 2;
const centerY = rect.top + rect.height / 2;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Fire mouse events in sequence
2025-06-26 15:58:29 +02:00
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
clientX: centerX,
clientY: centerY,
})
);
element.dispatchEvent(
new MouseEvent("mouseup", {
bubbles: true,
clientX: centerX,
clientY: centerY,
})
);
element.dispatchEvent(
new MouseEvent("click", {
bubbles: true,
clientX: centerX,
clientY: centerY,
})
);
2025-06-25 19:07:09 +02:00
// Focus and fire focus events
element.focus();
2025-06-26 15:58:29 +02:00
element.dispatchEvent(new FocusEvent("focusin", { bubbles: true }));
element.dispatchEvent(new FocusEvent("focus", { bubbles: true }));
2025-06-25 19:07:09 +02:00
// Wait for React/framework to update
2025-06-26 15:58:29 +02:00
await new Promise((resolve) => setTimeout(resolve, 100));
2025-06-25 19:07:09 +02:00
}
async clearElementContent(element) {
2025-06-26 15:58:29 +02:00
if (element.tagName === "INPUT" || element.tagName === "TEXTAREA") {
element.value = "";
element.dispatchEvent(new Event("input", { bubbles: true }));
} else if (element.contentEditable === "true") {
2025-06-25 19:07:09 +02:00
// For contenteditable, simulate selecting all and deleting
element.focus();
2025-06-26 15:58:29 +02:00
document.execCommand("selectAll");
document.execCommand("delete");
element.dispatchEvent(new Event("input", { bubbles: true }));
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
await new Promise((resolve) => setTimeout(resolve, 50));
2025-06-25 19:07:09 +02:00
}
async fillWithEvents(element, value) {
2025-06-26 15:58:29 +02:00
if (element.tagName === "INPUT" || element.tagName === "TEXTAREA") {
2025-06-25 19:07:09 +02:00
// Set value and fire comprehensive events
element.value = value;
2025-06-26 15:58:29 +02:00
element.dispatchEvent(new Event("beforeinput", { bubbles: true }));
element.dispatchEvent(new Event("input", { bubbles: true }));
element.dispatchEvent(new Event("change", { bubbles: true }));
2025-06-25 19:07:09 +02:00
// Fire keyboard events to simulate typing completion
2025-06-26 15:58:29 +02:00
element.dispatchEvent(
new KeyboardEvent("keydown", { bubbles: true, key: "End" })
);
element.dispatchEvent(
new KeyboardEvent("keyup", { bubbles: true, key: "End" })
);
} else if (element.contentEditable === "true") {
2025-06-25 19:07:09 +02:00
// For contenteditable elements (like Twitter)
element.textContent = value;
2025-06-26 15:58:29 +02:00
element.dispatchEvent(new Event("beforeinput", { bubbles: true }));
element.dispatchEvent(new Event("input", { bubbles: true }));
2025-06-25 19:07:09 +02:00
// Trigger composition events for better compatibility
2025-06-26 15:58:29 +02:00
element.dispatchEvent(
new CompositionEvent("compositionend", {
bubbles: true,
data: value,
})
);
2025-06-25 19:07:09 +02:00
// Fire selection change to notify frameworks
2025-06-26 15:58:29 +02:00
document.dispatchEvent(new Event("selectionchange"));
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
await new Promise((resolve) => setTimeout(resolve, 100));
2025-06-25 19:07:09 +02:00
}
getElementValue(element) {
2025-06-26 15:58:29 +02:00
if (element.tagName === "INPUT" || element.tagName === "TEXTAREA") {
2025-06-25 19:07:09 +02:00
return element.value;
2025-06-26 15:58:29 +02:00
} else if (element.contentEditable === "true") {
return element.textContent || element.innerText || "";
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
return "";
2025-06-25 19:07:09 +02:00
}
// Element state detection methods
getElementState(element) {
const state = {
disabled: this.isElementDisabled(element),
visible: this.isLikelyVisible(element),
clickable: this.isElementClickable(element),
focusable: this.isElementFocusable(element),
hasText: this.hasText(element),
2025-06-26 15:58:29 +02:00
isEmpty: this.isEmpty(element),
2025-06-25 19:07:09 +02:00
};
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Overall interaction readiness
2025-06-26 15:58:29 +02:00
state.interaction_ready =
state.visible && !state.disabled && (state.clickable || state.focusable);
2025-06-25 19:07:09 +02:00
return state;
}
isElementDisabled(element) {
// Check disabled attribute
if (element.disabled === true) return true;
2025-06-26 15:58:29 +02:00
if (element.getAttribute("disabled") !== null) return true;
2025-06-25 19:07:09 +02:00
// Check aria-disabled
2025-06-26 15:58:29 +02:00
if (element.getAttribute("aria-disabled") === "true") return true;
2025-06-25 19:07:09 +02:00
// Check common disabled classes
2025-06-26 15:58:29 +02:00
const disabledClasses = [
"disabled",
"btn-disabled",
"button-disabled",
"inactive",
];
2025-06-25 19:07:09 +02:00
const classList = Array.from(element.classList);
2025-06-26 15:58:29 +02:00
if (disabledClasses.some((cls) => classList.includes(cls))) return true;
2025-06-25 19:07:09 +02:00
// Check if parent form/fieldset is disabled
2025-06-26 15:58:29 +02:00
const parentFieldset = element.closest("fieldset[disabled]");
2025-06-25 19:07:09 +02:00
if (parentFieldset) return true;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check computed styles for pointer-events: none
const computedStyle = getComputedStyle(element);
2025-06-26 15:58:29 +02:00
if (computedStyle.pointerEvents === "none") return true;
2025-06-25 19:07:09 +02:00
return false;
}
isElementClickable(element) {
2025-06-26 15:58:29 +02:00
const clickableTags = ["BUTTON", "A", "INPUT"];
const clickableTypes = ["button", "submit", "reset"];
const clickableRoles = ["button", "link", "menuitem", "tab"];
2025-06-25 19:07:09 +02:00
// Check tag and type
if (clickableTags.includes(element.tagName)) return true;
if (element.type && clickableTypes.includes(element.type)) return true;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check role
2025-06-26 15:58:29 +02:00
const role = element.getAttribute("role");
2025-06-25 19:07:09 +02:00
if (role && clickableRoles.includes(role)) return true;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check for click handlers
2025-06-26 15:58:29 +02:00
if (element.onclick || element.getAttribute("onclick")) return true;
2025-06-25 19:07:09 +02:00
// Check for common clickable classes
2025-06-26 15:58:29 +02:00
const clickableClasses = ["btn", "button", "clickable", "link"];
2025-06-25 19:07:09 +02:00
const classList = Array.from(element.classList);
2025-06-26 15:58:29 +02:00
if (clickableClasses.some((cls) => classList.includes(cls))) return true;
2025-06-25 19:07:09 +02:00
return false;
}
isElementFocusable(element) {
2025-06-26 15:58:29 +02:00
const focusableTags = ["INPUT", "TEXTAREA", "SELECT", "BUTTON", "A"];
2025-06-25 19:07:09 +02:00
// Check if element is naturally focusable
if (focusableTags.includes(element.tagName)) return true;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
// Check tabindex
2025-06-26 15:58:29 +02:00
const tabindex = element.getAttribute("tabindex");
if (tabindex && tabindex !== "-1") return true;
2025-06-25 19:07:09 +02:00
// Check contenteditable
2025-06-26 15:58:29 +02:00
if (element.contentEditable === "true") return true;
2025-06-25 19:07:09 +02:00
// Check role
2025-06-26 15:58:29 +02:00
const focusableRoles = ["textbox", "searchbox", "button", "link"];
const role = element.getAttribute("role");
2025-06-25 19:07:09 +02:00
if (role && focusableRoles.includes(role)) return true;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return false;
}
hasText(element) {
2025-06-26 15:58:29 +02:00
const text =
element.textContent ||
element.value ||
element.getAttribute("aria-label") ||
"";
2025-06-25 19:07:09 +02:00
return text.trim().length > 0;
}
isEmpty(element) {
2025-06-26 15:58:29 +02:00
if (element.tagName === "INPUT" || element.tagName === "TEXTAREA") {
2025-06-25 19:07:09 +02:00
return !element.value || element.value.trim().length === 0;
}
2025-06-26 15:58:29 +02:00
if (element.contentEditable === "true") {
2025-06-25 19:07:09 +02:00
return !element.textContent || element.textContent.trim().length === 0;
}
return false;
}
async waitForCondition({ condition_type, selector, text, timeout = 5000 }) {
const startTime = Date.now();
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const conditions = {
2025-06-26 15:58:29 +02:00
element_visible: () => {
2025-06-25 19:07:09 +02:00
const el = document.querySelector(selector);
return el && el.offsetParent !== null;
},
2025-06-26 15:58:29 +02:00
text_present: () => document.body.textContent.includes(text),
2025-06-25 19:07:09 +02:00
};
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const checkCondition = conditions[condition_type];
if (!checkCondition) {
throw new Error(`Unknown condition type: ${condition_type}`);
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
while (Date.now() - startTime < timeout) {
if (checkCondition()) {
return {
condition_met: true,
2025-06-26 15:58:29 +02:00
wait_time: Date.now() - startTime,
2025-06-25 19:07:09 +02:00
};
}
2025-06-26 15:58:29 +02:00
await new Promise((resolve) => setTimeout(resolve, 100));
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
throw new Error(`Timeout waiting for condition: ${condition_type}`);
}
// Utility methods
registerElement(element) {
const id = `element_${++this.idCounter}`;
this.elementRegistry.set(id, element);
return id;
}
getElementById(id) {
// Check quick registry first (for q1, q2, etc.)
2025-06-26 15:58:29 +02:00
if (id.startsWith("q")) {
2025-06-25 19:07:09 +02:00
return this.quickRegistry.get(id);
}
// Then check main registry (for element_1, element_2, etc.)
return this.elementRegistry.get(id);
}
getElementName(element) {
2025-06-26 15:58:29 +02:00
return (
element.getAttribute("aria-label") ||
element.getAttribute("title") ||
element.textContent?.trim()?.substring(0, 50) ||
element.placeholder ||
element.tagName.toLowerCase()
);
2025-06-25 19:07:09 +02:00
}
isVisible(element) {
2025-06-26 15:58:29 +02:00
return (
element.offsetParent !== null &&
getComputedStyle(element).visibility !== "hidden" &&
getComputedStyle(element).opacity !== "0"
);
2025-06-25 19:07:09 +02:00
}
generateSelector(element) {
if (element.id) return `#${element.id}`;
2025-06-26 15:58:29 +02:00
if (element.getAttribute("data-testid"))
return `[data-testid="${element.getAttribute("data-testid")}"]`;
2025-06-25 19:07:09 +02:00
let selector = element.tagName.toLowerCase();
if (element.className) {
2025-06-26 15:58:29 +02:00
selector += `.${element.className.split(" ").join(".")}`;
2025-06-25 19:07:09 +02:00
}
return selector;
}
inferElementType(element, intentHint) {
const tagName = element.tagName.toLowerCase();
2025-06-26 15:58:29 +02:00
const role = element.getAttribute("role");
const type = element.getAttribute("type");
if (tagName === "input" && type === "search") return "search_input";
if (tagName === "input") return "input";
if (tagName === "textarea") return "textarea";
if (tagName === "button" || role === "button") return "button";
if (tagName === "a") return "link";
return "element";
2025-06-25 19:07:09 +02:00
}
calculateConfidence(element, intentHint) {
let confidence = 0.5;
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const text = this.getElementName(element).toLowerCase();
const hint = intentHint.toLowerCase();
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
if (text.includes(hint)) confidence += 0.3;
2025-06-26 15:58:29 +02:00
if (element.getAttribute("data-testid")) confidence += 0.2;
if (element.getAttribute("aria-label")) confidence += 0.1;
2025-06-25 19:07:09 +02:00
return Math.min(confidence, 1.0);
}
formatAnalysisResult(result, method, startTime) {
return {
...result,
method,
execution_time: Math.round(performance.now() - startTime),
2025-06-26 15:58:29 +02:00
analyzed_at: new Date().toISOString(),
2025-06-25 19:07:09 +02:00
};
}
// Two-phase utility methods
compressElement(element, isQuick = false) {
const actualElement = element.element || element;
const state = this.getElementState(actualElement);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
if (isQuick) {
// Quick phase - minimal data with state
const quickId = `q${++this.quickIdCounter}`;
this.quickRegistry.set(quickId, actualElement);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return {
id: quickId,
2025-06-26 15:58:29 +02:00
type: element.type || "element",
name: element.name?.substring(0, 20) || "unnamed",
2025-06-25 19:07:09 +02:00
conf: Math.round((element.confidence || 0.5) * 100),
2025-06-26 15:58:29 +02:00
selector: element.selector || "unknown",
state: state.disabled ? "disabled" : "enabled",
2025-06-25 19:07:09 +02:00
clickable: state.clickable,
2025-06-26 15:58:29 +02:00
ready: state.interaction_ready,
2025-06-25 19:07:09 +02:00
};
} else {
// Detailed phase - compact fingerprint with full state
return {
id: element.id,
fp: this.generateFingerprint(actualElement),
2025-06-26 15:58:29 +02:00
name: element.name?.substring(0, 30) || "unnamed",
2025-06-25 19:07:09 +02:00
conf: Math.round((element.confidence || 0.5) * 100),
meta: {
...this.getElementMeta(actualElement),
2025-06-26 15:58:29 +02:00
state: state,
},
2025-06-25 19:07:09 +02:00
};
}
}
generateFingerprint(element) {
const tag = element.tagName.toLowerCase();
const primaryClass = this.getPrimaryClass(element);
const context = this.getContext(element);
const position = this.getRelativePosition(element);
2025-06-26 15:58:29 +02:00
return `${tag}${
primaryClass ? "." + primaryClass : ""
}@${context}.${position}`;
2025-06-25 19:07:09 +02:00
}
getPrimaryClass(element) {
2025-06-26 15:58:29 +02:00
const importantClasses = [
"btn",
"button",
"link",
"input",
"search",
"submit",
"primary",
"secondary",
];
2025-06-25 19:07:09 +02:00
const classList = Array.from(element.classList);
2025-06-26 15:58:29 +02:00
return (
classList.find((cls) => importantClasses.includes(cls)) || classList[0]
);
2025-06-25 19:07:09 +02:00
}
getContext(element) {
2025-06-26 15:58:29 +02:00
const parent =
element.closest("nav, main, header, footer, form, section, article") ||
element.parentElement;
if (!parent) return "body";
2025-06-25 19:07:09 +02:00
return parent.tagName.toLowerCase();
}
getRelativePosition(element) {
const siblings = Array.from(element.parentElement?.children || []);
2025-06-26 15:58:29 +02:00
const sameTypeElements = siblings.filter(
(el) => el.tagName === element.tagName
);
2025-06-25 19:07:09 +02:00
return sameTypeElements.indexOf(element) + 1;
}
getElementMeta(element) {
const rect = element.getBoundingClientRect();
return {
2025-06-26 15:58:29 +02:00
rect: [
Math.round(rect.x),
Math.round(rect.y),
Math.round(rect.width),
Math.round(rect.height),
],
2025-06-25 19:07:09 +02:00
visible: this.isLikelyVisible(element),
2025-06-26 15:58:29 +02:00
form_context: element.closest("form") ? "form" : null,
2025-06-25 19:07:09 +02:00
};
}
detectPageType() {
const hostname = window.location.hostname;
const title = document.title.toLowerCase();
2025-06-26 15:58:29 +02:00
const hasSearch = document.querySelector(
'[type="search"], [role="searchbox"]'
);
const hasLogin = document.querySelector(
'[type="password"], [name*="login" i]'
);
const hasPost = document.querySelector(
'[contenteditable="true"], textarea[placeholder*="post" i]'
);
if (hostname.includes("twitter") || hostname.includes("x.com"))
return "social_media";
if (hostname.includes("github")) return "code_repository";
if (hostname.includes("google")) return "search_engine";
if (hasPost) return "content_creation";
if (hasLogin) return "authentication";
if (hasSearch) return "search_interface";
if (title.includes("shop") || title.includes("store")) return "ecommerce";
return "general_website";
2025-06-25 19:07:09 +02:00
}
countViewportElements() {
2025-06-26 15:58:29 +02:00
const elements = document.querySelectorAll(
"button, input, select, textarea, a[href]"
);
const viewportElements = Array.from(elements).filter((el) =>
this.isLikelyVisible(el)
);
2025-06-25 19:07:09 +02:00
return {
2025-06-26 15:58:29 +02:00
buttons: viewportElements.filter(
(el) => el.tagName === "BUTTON" || el.getAttribute("role") === "button"
).length,
inputs: viewportElements.filter((el) => el.tagName === "INPUT").length,
links: viewportElements.filter((el) => el.tagName === "A").length,
textareas: viewportElements.filter((el) => el.tagName === "TEXTAREA")
.length,
selects: viewportElements.filter((el) => el.tagName === "SELECT").length,
2025-06-25 19:07:09 +02:00
};
}
async quickViewportScan(intent_hint, maxResults = 3) {
2025-06-26 15:58:29 +02:00
const candidates = document.querySelectorAll(
'button, input, a[href], [role="button"], textarea'
);
2025-06-25 19:07:09 +02:00
const visibleElements = Array.from(candidates)
2025-06-26 15:58:29 +02:00
.filter((el) => this.isLikelyVisible(el))
2025-06-25 19:07:09 +02:00
.slice(0, 10); // Limit scan to first 10 visible elements
2025-06-26 15:58:29 +02:00
const scoredElements = visibleElements.map((element) => {
2025-06-25 19:07:09 +02:00
const confidence = this.calculateConfidence(element, intent_hint);
return {
element,
type: this.inferElementType(element, intent_hint),
name: this.getElementName(element),
2025-06-26 15:58:29 +02:00
confidence,
2025-06-25 19:07:09 +02:00
};
});
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return scoredElements
2025-06-26 15:58:29 +02:00
.filter((el) => el.confidence > 0.3)
2025-06-25 19:07:09 +02:00
.sort((a, b) => b.confidence - a.confidence)
.slice(0, maxResults)
2025-06-26 15:58:29 +02:00
.map((el) => this.compressElement(el, true));
2025-06-25 19:07:09 +02:00
}
scoreIntentMatch(intent_hint, quickMatches) {
2025-06-26 15:58:29 +02:00
if (quickMatches.length === 0) return "none";
const avgConfidence =
quickMatches.reduce((sum, match) => sum + match.conf, 0) /
quickMatches.length;
if (avgConfidence >= 80) return "high";
if (avgConfidence >= 60) return "medium";
if (avgConfidence >= 40) return "low";
return "none";
2025-06-25 19:07:09 +02:00
}
suggestPhase2Areas(quickMatches, intent_hint) {
const suggestions = [];
2025-06-26 15:58:29 +02:00
const elementTypes = [...new Set(quickMatches.map((m) => m.type))];
if (elementTypes.includes("button")) suggestions.push("buttons");
if (elementTypes.includes("input") || elementTypes.includes("textarea"))
suggestions.push("forms");
if (elementTypes.includes("link")) suggestions.push("navigation");
2025-06-25 19:07:09 +02:00
// Intent-based suggestions
2025-06-26 15:58:29 +02:00
if (
intent_hint.toLowerCase().includes("search") &&
!suggestions.includes("forms")
) {
suggestions.push("search_elements");
2025-06-25 19:07:09 +02:00
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return suggestions.slice(0, 3);
}
estimatePhase2Tokens(quickMatches) {
// Estimate tokens needed for detailed analysis
const baseTokens = 50; // Base overhead
const tokensPerElement = 15; // Detailed element info
const contextTokens = 20; // Page context
2025-06-26 15:58:29 +02:00
return baseTokens + quickMatches.length * tokensPerElement + contextTokens;
2025-06-25 19:07:09 +02:00
}
async expandQuickMatches(element_ids) {
const elements = [];
for (const id of element_ids) {
const element = this.quickRegistry.get(id);
if (element) {
const elementId = this.registerElement(element);
elements.push({
id: elementId,
2025-06-26 15:58:29 +02:00
type: this.inferElementType(element, ""),
2025-06-25 19:07:09 +02:00
name: this.getElementName(element),
confidence: 0.8, // Default confidence for expanded elements
2025-06-26 15:58:29 +02:00
element: element,
2025-06-25 19:07:09 +02:00
});
}
}
return elements;
}
async analyzeFocusAreas(focus_areas, intent_hint) {
const elements = [];
const areaSelectors = {
2025-06-26 15:58:29 +02:00
buttons: 'button, [role="button"], input[type="submit"]',
forms: 'input, textarea, select, [contenteditable="true"]',
navigation: 'nav a, .nav-item, [role="navigation"] a',
search_elements:
'[type="search"], [role="searchbox"], [placeholder*="search" i]',
2025-06-25 19:07:09 +02:00
};
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
for (const area of focus_areas) {
const selector = areaSelectors[area];
if (selector) {
const areaElements = document.querySelectorAll(selector);
for (const element of Array.from(areaElements).slice(0, 5)) {
if (this.isLikelyVisible(element)) {
const elementId = this.registerElement(element);
elements.push({
id: elementId,
type: this.inferElementType(element, intent_hint),
name: this.getElementName(element),
confidence: this.calculateConfidence(element, intent_hint),
2025-06-26 15:58:29 +02:00
element: element,
2025-06-25 19:07:09 +02:00
});
}
}
}
}
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return elements;
}
async fullEnhancedAnalysis(intent_hint, max_results) {
// Enhanced version of semantic analysis with better filtering
const relevantElements = document.querySelectorAll(`
button, input, select, textarea, a[href],
[role="button"], [role="textbox"], [role="searchbox"],
[aria-label], [data-testid], [contenteditable="true"]
`);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
const elements = Array.from(relevantElements)
2025-06-26 15:58:29 +02:00
.filter((el) => this.isLikelyVisible(el))
2025-06-25 19:07:09 +02:00
.slice(0, 30) // Analyze more elements than before
2025-06-26 15:58:29 +02:00
.map((element) => {
2025-06-25 19:07:09 +02:00
const elementId = this.registerElement(element);
return {
id: elementId,
type: this.inferElementType(element, intent_hint),
selector: this.generateSelector(element),
name: this.getElementName(element),
confidence: this.calculateConfidence(element, intent_hint),
2025-06-26 15:58:29 +02:00
element: element,
2025-06-25 19:07:09 +02:00
};
})
2025-06-26 15:58:29 +02:00
.filter((el) => el.confidence > 0.2) // Lower threshold for detailed analysis
2025-06-25 19:07:09 +02:00
.sort((a, b) => b.confidence - a.confidence);
2025-06-26 15:58:29 +02:00
2025-06-25 19:07:09 +02:00
return elements.slice(0, max_results);
}
deduplicateElements(elements) {
const seen = new Set();
2025-06-26 15:58:29 +02:00
return elements.filter((element) => {
2025-06-25 19:07:09 +02:00
const key = element.name + element.type;
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
async enhanceElementMetadata(elements) {
2025-06-26 15:58:29 +02:00
return elements.map((element) => ({
2025-06-25 19:07:09 +02:00
...element,
2025-06-26 15:58:29 +02:00
meta: this.getElementMeta(element.element),
2025-06-25 19:07:09 +02:00
}));
}
isLikelyVisible(element) {
const rect = element.getBoundingClientRect();
const style = getComputedStyle(element);
2025-06-26 15:58:29 +02:00
return (
rect.top < window.innerHeight &&
rect.bottom > 0 &&
rect.left < window.innerWidth &&
rect.right > 0 &&
style.visibility !== "hidden" &&
style.opacity !== "0" &&
style.display !== "none"
);
2025-06-25 19:07:09 +02:00
}
estimateTokenUsage(result) {
// Estimate token count based on result size
const jsonString = JSON.stringify(result);
return Math.ceil(jsonString.length / 4); // Rough estimate: 4 chars per token
}
2025-06-27 15:01:01 +02:00
// Get all links on the page with filtering options
async getPageLinks(options = {}) {
const {
include_internal = true,
include_external = true,
domain_filter = null,
max_results = 100
} = options;
const links = Array.from(document.querySelectorAll('a[href]'));
const currentDomain = this.extractDomain(window.location.href);
const results = [];
for (const link of links) {
if (results.length >= max_results) break;
const href = link.href;
const linkDomain = this.extractDomain(href);
const isInternal = this.isSameDomain(currentDomain, linkDomain);
// Apply internal/external filter
if (!include_internal && isInternal) continue;
if (!include_external && !isInternal) continue;
// Apply domain filter
if (domain_filter && !linkDomain.includes(domain_filter)) continue;
const linkText = link.textContent?.trim() || '';
const linkTitle = link.title || '';
results.push({
url: href,
text: linkText,
title: linkTitle,
type: isInternal ? 'internal' : 'external',
domain: linkDomain
});
}
return {
links: results,
total_found: links.length,
returned: results.length,
current_domain: currentDomain
};
}
// Check if two domains are the same (handles subdomains)
isSameDomain(domain1, domain2) {
if (!domain1 || !domain2) return false;
// Remove www. prefix for comparison
const clean1 = domain1.replace(/^www\./, '');
const clean2 = domain2.replace(/^www\./, '');
return clean1 === clean2;
}
// Extract domain from URL
extractDomain(url) {
try {
return new URL(url).hostname;
} catch {
return '';
}
}
// Scroll page with comprehensive options
async scrollPage(options = {}) {
const {
direction = 'down',
amount = 'medium',
pixels = null,
smooth = true,
element_id = null,
wait_after = 500
} = options;
const startPosition = {
x: window.scrollX,
y: window.scrollY
};
try {
// If element_id is provided, scroll to that element
if (element_id) {
const element = this.getElementById(element_id);
if (!element) {
throw new Error(`Element not found: ${element_id}`);
}
element.scrollIntoView({
behavior: smooth ? 'smooth' : 'instant',
block: 'center',
inline: 'center'
});
await new Promise(resolve => setTimeout(resolve, wait_after));
return {
success: true,
previous_position: startPosition,
new_position: { x: window.scrollX, y: window.scrollY },
method: 'scroll_to_element',
element_id: element_id,
element_name: this.getElementName(element)
};
}
// Calculate scroll amount based on amount parameter
let scrollAmount;
if (amount === 'custom' && pixels) {
scrollAmount = pixels;
} else {
switch (amount) {
case 'small':
scrollAmount = Math.min(200, window.innerHeight * 0.25);
break;
case 'medium':
scrollAmount = Math.min(500, window.innerHeight * 0.5);
break;
case 'large':
scrollAmount = Math.min(800, window.innerHeight * 0.8);
break;
case 'page':
scrollAmount = window.innerHeight * 0.9; // Slightly less than full page for overlap
break;
default:
scrollAmount = Math.min(500, window.innerHeight * 0.5);
}
}
// Calculate scroll direction
let scrollX = 0;
let scrollY = 0;
switch (direction) {
case 'up':
scrollY = -scrollAmount;
break;
case 'down':
scrollY = scrollAmount;
break;
case 'left':
scrollX = -scrollAmount;
break;
case 'right':
scrollX = scrollAmount;
break;
case 'top':
// Scroll to top of page
if (smooth) {
window.scrollTo({ top: 0, left: window.scrollX, behavior: 'smooth' });
} else {
window.scrollTo(window.scrollX, 0);
}
await new Promise(resolve => setTimeout(resolve, wait_after));
return {
success: true,
previous_position: startPosition,
new_position: { x: window.scrollX, y: window.scrollY },
direction: direction,
method: 'scroll_to_top'
};
case 'bottom':
// Scroll to bottom of page
const maxY = Math.max(
document.body.scrollHeight,
document.documentElement.scrollHeight
) - window.innerHeight;
if (smooth) {
window.scrollTo({ top: maxY, left: window.scrollX, behavior: 'smooth' });
} else {
window.scrollTo(window.scrollX, maxY);
}
await new Promise(resolve => setTimeout(resolve, wait_after));
return {
success: true,
previous_position: startPosition,
new_position: { x: window.scrollX, y: window.scrollY },
direction: direction,
method: 'scroll_to_bottom'
};
default:
throw new Error(`Unknown scroll direction: ${direction}`);
}
// Perform the scroll
if (smooth) {
window.scrollBy({
left: scrollX,
top: scrollY,
behavior: 'smooth'
});
} else {
window.scrollBy(scrollX, scrollY);
}
// Wait for scroll to complete
await new Promise(resolve => setTimeout(resolve, wait_after));
const finalPosition = {
x: window.scrollX,
y: window.scrollY
};
const actualScrolled = {
x: finalPosition.x - startPosition.x,
y: finalPosition.y - startPosition.y
};
return {
success: true,
previous_position: startPosition,
new_position: finalPosition,
direction: direction,
amount: amount,
requested_pixels: scrollAmount,
actual_scrolled: actualScrolled,
total_distance: Math.sqrt(actualScrolled.x ** 2 + actualScrolled.y ** 2),
smooth: smooth,
wait_after: wait_after
};
} catch (error) {
return {
success: false,
error: error.message,
previous_position: startPosition,
new_position: { x: window.scrollX, y: window.scrollY },
direction: direction,
amount: amount
};
}
}
2025-06-25 19:07:09 +02:00
}
// Initialize the automation system
2025-06-26 15:58:29 +02:00
const browserAutomation = new BrowserAutomation();