Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions apps/site/docs/en/api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ function aiInput(
cacheable?: boolean;
autoDismissKeyboard?: boolean;
mode?: 'replace' | 'clear' | 'typeOnly';
caret?: 'start' | 'end';
},
): Promise<void>;

Expand Down Expand Up @@ -320,6 +321,23 @@ function aiInput(
- `'replace'`: Clear the input field first, then input the text.
- `'typeOnly'`: Type the value directly without clearing the field first.
- `'clear'`: Clear the input field without entering new text.
- `caret?: 'start' | 'end'` - Web only. When `mode` is `'typeOnly'`, Midscene will try to move the caret before typing. Omit this option to keep Midscene's default focus-and-type behavior without controlling the caret position.

:::note Web caret behavior

`mode: 'append'` is kept only as a backward-compatible alias for `typeOnly`; it does not mean the text will be appended at the end. To explicitly request append-at-end behavior on Web, use:

```typescript
await agent.aiInput('The search input box', {
value: 'Hello World',
mode: 'typeOnly',
caret: 'end',
});
```

Due to the complexity of input types on Web pages, such as rich-text editors, iframe, and shadow DOM, caret movement and input clearing cannot work with 100% reliability for every input field.

:::

**Backward compatible usage (deprecated but still supported):**
- `value: string | number` - The text content to input.
Expand Down
18 changes: 18 additions & 0 deletions apps/site/docs/zh/api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ function aiInput(
cacheable?: boolean;
autoDismissKeyboard?: boolean;
mode?: 'replace' | 'clear' | 'typeOnly';
caret?: 'start' | 'end';
},
): Promise<void>;

Expand Down Expand Up @@ -313,6 +314,23 @@ function aiInput(
- `'replace'`: 先清空输入框,然后输入文本。
- `'typeOnly'`: 直接输入文本,不会先清空输入框。
- `'clear'`: 清空输入框,不会输入新的文本。
- `caret?: 'start' | 'end'` - 仅 Web 平台生效。当 `mode` 为 `'typeOnly'` 时,输入前会尝试移动光标。不传该参数时,Midscene 只执行默认的聚焦和输入流程,不主动控制光标位置。

:::note Web 光标行为

`mode: 'append'` 只作为 `typeOnly` 的向后兼容别名保留,并不表示会将文本追加到末尾。如果需要在 Web 平台明确请求“追加到末尾”,请使用:

```typescript
await agent.aiInput('搜索框', {
value: 'Hello World',
mode: 'typeOnly',
caret: 'end',
});
```

受限于 Web 页面中复杂的输入框类型(富文本编辑器、iframe、shadow DOM 等),移动光标或清空输入框内容的能力无法对所有的输入框百分百生效。

:::

**兼容用法**(已过时,但仍然支持):
- `value: string | number` - 要输入的文本内容。
Expand Down
48 changes: 21 additions & 27 deletions packages/core/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ import {
} from '@midscene/shared/env';
import { getDebug } from '@midscene/shared/logger';
import { assert, ifInBrowser, uuid } from '@midscene/shared/utils';
import { defineActionSleep } from '../device';
import { type ActionInputCaret, defineActionSleep } from '../device';
import { TaskCache } from './task-cache';
import {
TaskExecutionError,
Expand Down Expand Up @@ -144,6 +144,20 @@ export type AiActOptions = {
abortSignal?: AbortSignal;
};

type AiInputMode = 'replace' | 'clear' | 'typeOnly' | 'append';
type BaseAiInputOption = LocateOption & {
// Platform-sensitive legacy option exposed through core aiInput.
// It mainly affects mobile soft-keyboard flows and is ignored by platforms
// that do not implement keyboard dismissal.
autoDismissKeyboard?: boolean;
mode?: AiInputMode;
caret?: ActionInputCaret;
};
type AiInputOption = BaseAiInputOption & {
value: string | number;
};
type LegacyAiInputOption = BaseAiInputOption;

export class Agent<
InterfaceType extends AbstractInterface = AbstractInterface,
> {
Expand Down Expand Up @@ -616,12 +630,7 @@ export class Agent<
}

// New signature, always use locatePrompt as the first param
async aiInput(
locatePrompt: TUserPrompt,
opt: LocateOption & { value: string | number } & {
autoDismissKeyboard?: boolean;
} & { mode?: 'replace' | 'clear' | 'typeOnly' | 'append' },
): Promise<any>;
async aiInput(locatePrompt: TUserPrompt, opt: AiInputOption): Promise<any>;

// Legacy signature - deprecated
/**
Expand All @@ -630,29 +639,18 @@ export class Agent<
async aiInput(
value: string | number,
locatePrompt: TUserPrompt,
opt?: LocateOption & { autoDismissKeyboard?: boolean } & {
mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
}, // AndroidDeviceInputOpt &
opt?: LegacyAiInputOption,
): Promise<any>;

// Implementation
async aiInput(
locatePromptOrValue: TUserPrompt | string | number,
locatePromptOrOpt:
| TUserPrompt
| (LocateOption & { value: string | number } & {
autoDismissKeyboard?: boolean;
} & { mode?: 'replace' | 'clear' | 'typeOnly' | 'append' }) // AndroidDeviceInputOpt &
| undefined,
optOrUndefined?: LocateOption, // AndroidDeviceInputOpt &
locatePromptOrOpt: TUserPrompt | AiInputOption | undefined,
optOrUndefined?: LegacyAiInputOption,
) {
let value: string | number;
let locatePrompt: TUserPrompt;
let opt:
| (LocateOption & { value: string | number } & {
autoDismissKeyboard?: boolean;
} & { mode?: 'replace' | 'clear' | 'typeOnly' | 'append' }) // AndroidDeviceInputOpt &
| undefined;
let opt: AiInputOption | undefined;

// Check if using new signature (first param is locatePrompt, second has value)
if (
Expand All @@ -662,11 +660,7 @@ export class Agent<
) {
// New signature: aiInput(locatePrompt, opt)
locatePrompt = locatePromptOrValue as TUserPrompt;
const optWithValue = locatePromptOrOpt as LocateOption & {
// AndroidDeviceInputOpt &
value: string | number;
autoDismissKeyboard?: boolean;
};
const optWithValue = locatePromptOrOpt as AiInputOption;
value = optWithValue.value;
opt = optWithValue;
} else {
Expand Down
29 changes: 26 additions & 3 deletions packages/core/src/device/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export interface KeyboardInputPrimitives {
target?: unknown;
replace?: boolean;
focusOnly?: boolean;
caret?: ActionInputCaret;
},
): Promise<void>;
clearInput(target?: unknown): Promise<void>;
Expand Down Expand Up @@ -408,21 +409,41 @@ export const actionInputParamSchema = z.object({
'If true, the keyboard will be dismissed after the input is completed. Do not set it unless the user asks you to do so.',
),
});
const actionInputCaretParamSchema = z
.enum(['start', 'end'])
.optional()
.describe(
'Web only. In typeOnly mode, best-effort move the caret before typing. Use only when the user explicitly asks to insert at the start or append at the end.',
);
const webActionInputParamSchema = actionInputParamSchema.extend({
caret: actionInputCaretParamSchema,
});
export type ActionInputCaret = 'start' | 'end';
export type ActionInputParam = {
value: string;
locate?: LocateResultElement;
mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
autoDismissKeyboard?: boolean;
caret?: ActionInputCaret;
};

export type DefineActionInputOptions = {
caret?: boolean;
};

export const defineActionInput = (
keyboard: KeyboardInputPrimitives,
options: DefineActionInputOptions = {},
): DeviceAction<ActionInputParam> => {
return defineAction<typeof actionInputParamSchema, ActionInputParam>({
const paramSchema = options.caret
? webActionInputParamSchema
: actionInputParamSchema;

return defineAction<typeof paramSchema, ActionInputParam>({
name: 'Input',
description: 'Input the value into the element',
interfaceAlias: 'aiInput',
paramSchema: actionInputParamSchema,
paramSchema,
sample: {
value: 'test@example.com',
locate: { prompt: 'the email input field' },
Expand All @@ -446,6 +467,7 @@ export const defineActionInput = (
target: param.locate,
replace: param.mode !== 'typeOnly',
autoDismissKeyboard: param.autoDismissKeyboard,
caret: param.caret,
});
},
});
Expand Down Expand Up @@ -952,6 +974,7 @@ export interface InputPrimitiveActionOptions {
sleep?: (timeMs: number) => Promise<void>;
includeSwipe?: boolean;
includePinch?: boolean;
inputCaret?: boolean;
systemActions?: SystemInputActionOptions;
}

Expand Down Expand Up @@ -997,7 +1020,7 @@ export function defineActionsFromInputPrimitives(

if (keyboard) {
actions.push(
defineActionInput(keyboard),
defineActionInput(keyboard, { caret: options.inputCaret }),
defineActionClearInput(keyboard.clearInput),
defineActionKeyboardPress(keyboard.keyboardPress),
defineActionCursorMove({ keyboard, sleep: options.sleep }),
Expand Down
Loading
Loading