diff --git a/packages/shared/src/constants/index.ts b/packages/shared/src/constants/index.ts index 594450ee50..2207ea177b 100644 --- a/packages/shared/src/constants/index.ts +++ b/packages/shared/src/constants/index.ts @@ -36,4 +36,6 @@ export const DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT = 5000; export const DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT = 2000; export const DEFAULT_WAIT_FOR_NETWORK_IDLE_CONCURRENCY = 2; +export const SUB_XPATH_SEPARATOR = '|>>|'; + export { PLAYWRIGHT_EXAMPLE_CODE, YAML_EXAMPLE_CODE } from './example-code'; diff --git a/packages/shared/src/extractor/index.ts b/packages/shared/src/extractor/index.ts index 89037e9903..432ac459fd 100644 --- a/packages/shared/src/extractor/index.ts +++ b/packages/shared/src/extractor/index.ts @@ -40,9 +40,13 @@ export { getXpathsById, getNodeInfoByXpath, getElementInfoByXpath, + getElementInfoByXpathInCurrentFrame, + getIframeRectByXpath, getElementXpath, } from './locator'; +export type { CrossOriginIframeSignal } from './locator'; + export { generateElementByPoint, generateElementByRect } from './dom-util'; export { isNotContainerElement } from './dom-util'; diff --git a/packages/shared/src/extractor/locator.ts b/packages/shared/src/extractor/locator.ts index 24f1f95c82..96d7967bf5 100644 --- a/packages/shared/src/extractor/locator.ts +++ b/packages/shared/src/extractor/locator.ts @@ -9,8 +9,14 @@ import { } from './util'; import { collectElementInfo } from './web-extractor'; -/** Separator for compound XPath across iframes (e.g. "iframePath|>>|/html/body/div") */ -const SUB_XPATH_SEPARATOR = '|>>|'; +import { SUB_XPATH_SEPARATOR } from '../constants/index'; + +/** Returned by getXpathsByPoint when it hits a cross-origin iframe boundary */ +export interface CrossOriginIframeSignal { + __crossOriginIframe: true; + iframeXpath: string; + translatedPoint: { left: number; top: number }; +} /** Parse the non-standard `zoom` CSS property (Chromium-only) with fallback to 1 */ function parseCSSZoom(style: CSSStyleDeclaration): number { @@ -318,6 +324,25 @@ export function getXpathsByPoint( const tag = element.tagName.toLowerCase(); if (tag === 'iframe' || tag === 'frame') { + const buildCrossOriginSignal = (): CrossOriginIframeSignal => { + const localPoint = translatePointToIframeCoordinates( + { left, top }, + element, + currentWindow, + ); + const currentIframeXpath = getElementXpath( + element, + isOrderSensitive, + false, + true, + ); + return { + __crossOriginIframe: true, + iframeXpath: xpathPrefix + currentIframeXpath, + translatedPoint: { left: localPoint.left, top: localPoint.top }, + }; + }; + try { const contentWindow = (element as HTMLIFrameElement).contentWindow; const contentDocument = (element as HTMLIFrameElement).contentDocument; @@ -341,11 +366,15 @@ export function getXpathsByPoint( top = localPoint.top; continue; } + + // contentDocument is null — cross-origin iframe (browser returns null instead of throwing) + return buildCrossOriginSignal() as any; } catch (error) { logger( '[midscene:locator] iframe penetration failed (cross-origin?):', error, ); + return buildCrossOriginSignal() as any; } } @@ -467,3 +496,88 @@ export function getElementInfoByXpath(xpath: string): ElementInfo | null { return collectElementInfo(node, targetWin, targetDoc, 1, iframeOffset, true); } + +export function getElementInfoByXpathInCurrentFrame( + xpath: string, + iframeOffset: { left: number; top: number }, +): ElementInfo | null { + const currentDocument: Document = + typeof document !== 'undefined' ? document : (undefined as any); + const currentWindow: Window = + typeof window !== 'undefined' ? window : (undefined as any); + + const xpathResult = currentDocument.evaluate( + xpath, + currentDocument, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null, + ); + + if (xpathResult.snapshotLength !== 1) { + logger( + `[midscene:locator] XPath "${xpath}" matched ${xpathResult.snapshotLength} elements in current frame (expected 1), discarding.`, + ); + return null; + } + + const node = xpathResult.snapshotItem(0); + if (!node) return null; + + const targetWin = currentWindow as typeof globalThis.window; + const targetDoc = currentDocument as typeof globalThis.document; + if (node instanceof (targetWin as any).HTMLElement) { + const rect = getRect(node, 1, targetWin); + const isVisible = isElementPartiallyInViewport( + rect, + targetWin, + targetDoc, + 1, + ); + if (!isVisible) { + (node as HTMLElement).scrollIntoView({ + behavior: 'instant', + block: 'center', + }); + } + } + + return collectElementInfo(node, targetWin, targetDoc, 1, iframeOffset, true); +} + +export function getIframeRectByXpath(xpath: string): { + left: number; + top: number; + borderLeft: number; + borderTop: number; + zoom: number; +} | null { + const currentDocument: Document = + typeof document !== 'undefined' ? document : (undefined as any); + const currentWindow: Window = + typeof window !== 'undefined' ? window : (undefined as any); + + const xpathResult = currentDocument.evaluate( + xpath, + currentDocument, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null, + ); + + if (xpathResult.snapshotLength !== 1) return null; + const el = xpathResult.snapshotItem(0); + if (!el || !(el as Element).getBoundingClientRect) return null; + + const rect = (el as Element).getBoundingClientRect(); + const style = currentWindow.getComputedStyle(el as Element); + const zoom = parseCSSZoom(style); + + return { + left: rect.left, + top: rect.top, + borderLeft: Number.parseFloat(style.borderLeftWidth) || 0, + borderTop: Number.parseFloat(style.borderTopWidth) || 0, + zoom, + }; +} diff --git a/packages/web-integration/src/puppeteer/base-page.ts b/packages/web-integration/src/puppeteer/base-page.ts index 71c8f9abb3..1a84abbacc 100644 --- a/packages/web-integration/src/puppeteer/base-page.ts +++ b/packages/web-integration/src/puppeteer/base-page.ts @@ -17,6 +17,7 @@ import { DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT, DEFAULT_WAIT_FOR_NETWORK_IDLE_CONCURRENCY, DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT, + SUB_XPATH_SEPARATOR, } from '@midscene/shared/constants'; import type { ElementInfo } from '@midscene/shared/extractor'; import { treeToList } from '@midscene/shared/extractor'; @@ -27,8 +28,16 @@ import { getExtraReturnLogic, } from '@midscene/shared/node'; import { assert } from '@midscene/shared/utils'; -import type { Page as PlaywrightPage } from 'playwright'; -import type { CDPSession, Protocol, Page as PuppeteerPage } from 'puppeteer'; +import type { + Frame as PlaywrightFrame, + Page as PlaywrightPage, +} from 'playwright'; +import type { + CDPSession, + Protocol, + Frame as PuppeteerFrame, + Page as PuppeteerPage, +} from 'puppeteer'; import { type CacheFeatureOptions, type WebElementCacheFeature, @@ -248,19 +257,233 @@ export class Page< return treeToList(tree); } - private async getXpathsByPoint(point: Point, isOrderSensitive: boolean) { + private async getXpathsByPoint( + point: Point, + isOrderSensitive: boolean, + ): Promise { const elementInfosScriptContent = getElementInfosScriptContent(); - return this.evaluateJavaScript( + const result = await this.evaluateJavaScript( `${elementInfosScriptContent}midscene_element_inspector.getXpathsByPoint({left: ${point.left}, top: ${point.top}}, ${isOrderSensitive})`, ); + + if ( + result && + typeof result === 'object' && + '__crossOriginIframe' in result + ) { + return this.handleCrossOriginXpathsByPoint( + result as { + __crossOriginIframe: true; + iframeXpath: string; + translatedPoint: { left: number; top: number }; + }, + isOrderSensitive, + ); + } + + return result; + } + + private async handleCrossOriginXpathsByPoint( + signal: { + __crossOriginIframe: true; + iframeXpath: string; + translatedPoint: { left: number; top: number }; + }, + isOrderSensitive: boolean, + depth = 0, + ): Promise { + const MAX_CROSS_ORIGIN_DEPTH = 5; + if (depth >= MAX_CROSS_ORIGIN_DEPTH) { + debugPage( + 'Cross-origin iframe recursion depth exceeded (%d), stopping', + depth, + ); + return [signal.iframeXpath]; + } + + debugPage( + 'Cross-origin iframe detected, iframe xpath: %s', + signal.iframeXpath, + ); + + const frame = await this.findFrameByXpath( + this.underlyingPage, + signal.iframeXpath, + ); + if (!frame) { + debugPage('Could not find Frame for cross-origin iframe'); + return [signal.iframeXpath]; + } + + const elementInfosScriptContent = getElementInfosScriptContent(); + const innerResult = await this.evaluateInContext( + frame, + `${elementInfosScriptContent}midscene_element_inspector.getXpathsByPoint({left: ${signal.translatedPoint.left}, top: ${signal.translatedPoint.top}}, ${isOrderSensitive})`, + ); + + if ( + innerResult && + typeof innerResult === 'object' && + '__crossOriginIframe' in (innerResult as any) + ) { + const innerSignal = innerResult as { + __crossOriginIframe: true; + iframeXpath: string; + translatedPoint: { left: number; top: number }; + }; + innerSignal.iframeXpath = `${signal.iframeXpath}${SUB_XPATH_SEPARATOR}${innerSignal.iframeXpath}`; + return this.handleCrossOriginXpathsByPoint( + innerSignal, + isOrderSensitive, + depth + 1, + ); + } + + if (Array.isArray(innerResult) && innerResult.length > 0) { + return innerResult.map( + (inner: string) => + `${signal.iframeXpath}${SUB_XPATH_SEPARATOR}${inner}`, + ); + } + + return [signal.iframeXpath]; + } + + private async findFrameByXpath( + parentContext: + | PuppeteerPage + | PuppeteerFrame + | PlaywrightPage + | PlaywrightFrame, + iframeXpath: string, + ): Promise { + const parts = iframeXpath + .split(SUB_XPATH_SEPARATOR) + .map((p) => p.trim()) + .filter(Boolean); + + let context = parentContext; + for (const part of parts) { + const frame = await this.findSingleFrameByXpath(context, part); + if (!frame) return null; + context = frame; + } + return context as PuppeteerFrame | PlaywrightFrame; + } + + private async findSingleFrameByXpath( + parentContext: + | PuppeteerPage + | PuppeteerFrame + | PlaywrightPage + | PlaywrightFrame, + xpath: string, + ): Promise { + try { + if (this.interfaceType === 'puppeteer') { + const ctx = parentContext as PuppeteerPage | PuppeteerFrame; + const handles = await ctx.$$(`xpath/${xpath}`); + if (!handles.length) return null; + const handle = handles[0]; + const contentFrame = await handle.contentFrame(); + await handle.dispose(); + return contentFrame; + } + const ctx = parentContext as PlaywrightPage | PlaywrightFrame; + const handle = await ctx.locator(`xpath=${xpath}`).elementHandle(); + if (!handle) return null; + const contentFrame = await handle.contentFrame(); + await handle.dispose(); + return contentFrame; + } catch (error) { + debugPage('findSingleFrameByXpath failed for xpath %s: %s', xpath, error); + return null; + } } private async getElementInfoByXpath(xpath: string) { + const parts = xpath + .split(SUB_XPATH_SEPARATOR) + .map((p) => p.trim()) + .filter(Boolean); + + if (parts.length <= 1) { + const elementInfosScriptContent = getElementInfosScriptContent(); + return this.evaluateJavaScript( + `${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath(${JSON.stringify(xpath)})`, + ); + } + + return this.resolveMultiFrameElementInfo(parts); + } + + private async evaluateInContext( + context: PuppeteerPage | PuppeteerFrame | PlaywrightPage | PlaywrightFrame, + script: string, + ): Promise { + return (context as any).evaluate(script); + } + + private async resolveMultiFrameElementInfo( + xpathParts: string[], + ): Promise { + let currentContext: + | PuppeteerPage + | PuppeteerFrame + | PlaywrightPage + | PlaywrightFrame = this.underlyingPage; + let accumulatedOffset = { left: 0, top: 0 }; + + for (let i = 0; i < xpathParts.length - 1; i++) { + const iframeXpath = xpathParts[i]; + + const elementInfosScriptContent = getElementInfosScriptContent(); + const iframeRect = await this.evaluateInContext( + currentContext, + `${elementInfosScriptContent}midscene_element_inspector.getIframeRectByXpath(${JSON.stringify(iframeXpath)})`, + ); + + if (!iframeRect) { + debugPage( + 'resolveMultiFrameElementInfo: could not find iframe for xpath: %s', + iframeXpath, + ); + return null; + } + + const zoom = iframeRect.zoom || 1; + accumulatedOffset = { + left: + accumulatedOffset.left / zoom + + iframeRect.left + + iframeRect.borderLeft, + top: + accumulatedOffset.top / zoom + iframeRect.top + iframeRect.borderTop, + }; + + const childFrame = await this.findFrameByXpath( + currentContext, + iframeXpath, + ); + if (!childFrame) { + debugPage( + 'resolveMultiFrameElementInfo: could not find Frame for iframe xpath: %s', + iframeXpath, + ); + return null; + } + + currentContext = childFrame; + } + + const innerXpath = xpathParts[xpathParts.length - 1]; const elementInfosScriptContent = getElementInfosScriptContent(); - return this.evaluateJavaScript( - `${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath(${JSON.stringify(xpath)})`, + return this.evaluateInContext( + currentContext, + `${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpathInCurrentFrame(${JSON.stringify(innerXpath)}, ${JSON.stringify(accumulatedOffset)})`, ); }