Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/chrome-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
},
"devDependencies": {
"@rsbuild/core": "^1.6.15",
"rimraf": "~3.0.2",
"rimraf": "~5.0.10",
"@rsbuild/plugin-less": "^1.5.0",
"@rsbuild/plugin-node-polyfill": "1.4.2",
"@rsbuild/plugin-react": "^1.4.1",
Expand Down
1 change: 1 addition & 0 deletions apps/site/docs/en/model-config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ If you configure a dedicated Insight or Planning model, model-related `MIDSCENE_
| `MIDSCENE_MODEL_EXTRA_BODY_JSON` | JSON blob merged into each chat completion request body. Unlike `MIDSCENE_MODEL_INIT_CONFIG_JSON` (which configures the SDK client), this is spread into every `completion.create()` call sent to the model, e.g. enabling thinking mode in vLLM: `'{"chat_template_kwargs":{"enable_thinking":true}}'` |
| `MIDSCENE_RUN_DIR` | Run artifact directory, like report and logs. Defaults to `midscene_run` in the current working directory; accepts absolute or relative paths |
| `MIDSCENE_PREFERRED_LANGUAGE` | Optional. Preferred response language. Defaults to `Chinese` if timezone is GMT+8, otherwise `English` |
| `MIDSCENE_PLAYGROUND_HOST` | Playground server listen host. Defaults to `0.0.0.0` so local device runtimes can reach it. Set to `127.0.0.1` for local-only access |

> Note: Control replanning behavior with the agent option `replanningCycleLimit` (defaults to 20, or 40 for `vlm-ui-tars`), not with environment variables.

Expand Down
1 change: 1 addition & 0 deletions apps/site/docs/zh/model-config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ export MIDSCENE_MODEL_FAMILY="gpt-5"
| `MIDSCENE_MODEL_EXTRA_BODY_JSON` | 合并到每次 chat completion 请求体中的 JSON。与 `MIDSCENE_MODEL_INIT_CONFIG_JSON`(配置 SDK 客户端)不同,此参数会展开到每次发送到模型的 `completion.create()` 调用体中,例如在 vLLM 中启用思考模式:`'{"chat_template_kwargs":{"enable_thinking":true}}'` |
| `MIDSCENE_RUN_DIR` | 运行产物目录,默认值为当前工作目录下的 `midscene_run`,支持设置绝对路径或相对路径 |
| `MIDSCENE_PREFERRED_LANGUAGE` | 可选,模型响应的语言;如果当前系统时区是 GMT+8 则默认是 `Chinese`,否则是 `English` |
| `MIDSCENE_PLAYGROUND_HOST` | Playground 服务器监听地址,默认 `0.0.0.0`,方便本地设备运行时访问。如需仅允许本机访问,可设为 `127.0.0.1` |

> 提示:通过 Agent 的 `replanningCycleLimit` 入参控制重规划次数(默认 20,`vlm-ui-tars` 为 40),不再使用环境变量。

Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"format": "pretty-quick --staged",
"commit": "cz",
"check-spell": "npx cspell",
"clean": "rm -rf .nx/cache packages/*/dist packages/*/node_modules/.cache apps/*/dist apps/*/node_modules/.cache packages/playground/static packages/ios/static"
"clean": "rimraf --glob .nx/cache packages/playground/static packages/ios/static \"packages/*/dist\" \"packages/*/node_modules/.cache\" \"apps/*/dist\" \"apps/*/node_modules/.cache\""
},
"simple-git-hooks": {
"pre-commit": "npx lint-staged",
Expand Down Expand Up @@ -67,6 +67,7 @@
"prettier": "^3.6.2",
"pretty-quick": "3.1.3",
"semver": "7.5.2",
"rimraf": "~5.0.10",
"simple-git-hooks": "^2.13.1"
}
}
6 changes: 4 additions & 2 deletions packages/core/src/ai-model/llm-planning.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type { ChatCompletionMessageParam } from 'openai/resources/index';
import {
buildYamlFlowFromPlans,
fillBboxParam,
findActionInActionSpaceOrThrow,
findAllMidsceneLocatorField,
} from '../common';
import type { ConversationHistory } from './conversation-history';
Expand Down Expand Up @@ -301,8 +302,9 @@ export async function plan(

actions.forEach((action) => {
const type = action.type;
const actionInActionSpace = opts.actionSpace.find(
(action) => action.name === type,
const actionInActionSpace = findActionInActionSpaceOrThrow(
type,
opts.actionSpace,
);

debug('actionInActionSpace matched', actionInActionSpace);
Expand Down
22 changes: 15 additions & 7 deletions packages/core/src/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,20 @@ export async function markupImageForLLM(
return imagePayload;
}

export function findActionInActionSpaceOrThrow(
planType: string,
actionSpace: DeviceAction<any>[],
): DeviceAction<any> {
const action = actionSpace.find((item) => item.name === planType);
if (!action) {
const available = actionSpace.map((item) => item.name).join(', ');
throw new Error(
`Action type '${planType}' is not in the current action space. Available actions: ${available || '(none)'}`,
);
}
return action;
}

export function buildYamlFlowFromPlans(
plans: PlanningAction[],
actionSpace: DeviceAction<any>[],
Expand All @@ -468,13 +482,7 @@ export function buildYamlFlowFromPlans(
for (const plan of plans) {
const verb = plan.type;

const action = actionSpace.find((action) => action.name === verb);
if (!action) {
console.warn(
`Cannot convert action ${verb} to yaml flow. Will ignore it.`,
);
continue;
}
const action = findActionInActionSpaceOrThrow(verb, actionSpace);

const flowKey = action.interfaceAlias || verb;
const flowParam = action.paramSchema
Expand Down
18 changes: 3 additions & 15 deletions packages/core/tests/ai/llm-planning/input.test.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,13 @@
import { ConversationHistory, plan } from '@/ai-model';
import type { DeviceAction } from '@/types';
import { globalModelConfigManager } from '@midscene/shared/env';
import { beforeAll, describe, expect, it, vi } from 'vitest';
import { z } from 'zod';
import { getContextFromFixture } from '../../evaluation';
import { mockActionSpace } from 'tests/common';
import { getContextFromFixture } from 'tests/evaluation';
import { describe, expect, it, vi } from 'vitest';
vi.setConfig({
testTimeout: 180 * 1000,
hookTimeout: 30 * 1000,
});

const mockActionSpace: DeviceAction[] = [
{
name: 'Input',
description: 'Replace the input field with a new value',
paramSchema: z.object({
value: z.string(),
}),
call: () => {},
},
];

const defaultModelConfig = globalModelConfigManager.getModelConfig('default');

describe('automation - planning input', () => {
Expand Down
9 changes: 9 additions & 0 deletions packages/core/tests/unit-test/llm-planning.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ describe('llm planning - doubao', () => {
});

describe('llm planning - build yaml flow', () => {
it('throws when planned action is not in actionSpace', () => {
expect(() =>
buildYamlFlowFromPlans(
[{ type: 'NonExistentAction', param: {}, thought: '' }],
[{ name: 'Tap', call: async () => {} }],
),
).toThrow(/not in the current action space/);
});

it('build yaml flow', () => {
const flow = buildYamlFlowFromPlans(
[
Expand Down
7 changes: 4 additions & 3 deletions packages/playground/src/launcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { spawn } from 'node:child_process';
import type { Agent } from '@midscene/core/agent';
import { PLAYGROUND_SERVER_PORT } from '@midscene/shared/constants';
import cors, { type CorsOptions } from 'cors';
import PlaygroundServer from './server';
import PlaygroundServer, { resolvePlaygroundBrowserHost } from './server';
import type { AgentFactory } from './types';

export interface LaunchPlaygroundOptions {
Expand Down Expand Up @@ -201,7 +201,8 @@ function createPlaygroundLauncher(agentOrFactory: LaunchableAgentSource) {
console.log(`✅ Playground server started on port ${port}`);
}

const url = `http://127.0.0.1:${port}`;
const host = resolvePlaygroundBrowserHost();
const url = `http://${host}:${port}`;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Bracket IPv6 playground hosts before building URLs

When MIDSCENE_PLAYGROUND_HOST is set to an IPv6 literal such as ::1, the server can listen on that host, but this interpolation produces http://::1:<port>, which is not a valid HTTP URL. Since the new env var accepts listen hosts and this helper already handles the IPv6 wildcard ::, format IPv6 browser hosts with brackets (for example http://[::1]:5800) before passing the URL to openInBrowser.

Useful? React with 👍 / 👎.


// Open browser if requested
if (openBrowser) {
Expand All @@ -211,7 +212,7 @@ function createPlaygroundLauncher(agentOrFactory: LaunchableAgentSource) {
return {
server: launchedServer,
port,
host: '127.0.0.1',
host,
close: async () => {
if (verbose) {
console.log('🛑 Shutting down Midscene Playground...');
Expand Down
15 changes: 14 additions & 1 deletion packages/playground/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,18 @@ import 'dotenv/config';

const defaultPort = PLAYGROUND_SERVER_PORT;

/** Default `0.0.0.0`. Override via `MIDSCENE_PLAYGROUND_HOST` (e.g. `127.0.0.1`). */
export function resolvePlaygroundListenHost(): string {
return process.env.MIDSCENE_PLAYGROUND_HOST?.trim() || '0.0.0.0';
}

export function resolvePlaygroundBrowserHost(): string {
const listenHost = resolvePlaygroundListenHost();
return listenHost === '0.0.0.0' || listenHost === '::'
? '127.0.0.1'
: listenHost;
}

function serializeAiConfigSignature(aiConfig: Record<string, unknown>): string {
return JSON.stringify(
Object.entries(aiConfig).sort(([leftKey], [rightKey]) =>
Expand Down Expand Up @@ -1832,7 +1844,8 @@ class PlaygroundServer {

return new Promise((resolve) => {
const serverPort = this.port ?? defaultPort;
this.server = this._app.listen(serverPort, '0.0.0.0', () => {
const listenHost = resolvePlaygroundListenHost();
this.server = this._app.listen(serverPort, listenHost, () => {
resolve(this);
});
});
Expand Down
24 changes: 24 additions & 0 deletions packages/playground/tests/unit/launcher.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,37 @@ describe('playground launcher', () => {
});

expect(result.port).toBe(5921);
expect(result.host).toBe('127.0.0.1');
expect(result.server.id).toBe('launcher-instance-id');
expect(result.server.staticPath).toBe(staticPath);

await result.close();
expect(agent.destroy).toHaveBeenCalledTimes(1);
});

it('should return MIDSCENE_PLAYGROUND_HOST when configured', async () => {
const originalHost = process.env.MIDSCENE_PLAYGROUND_HOST;
process.env.MIDSCENE_PLAYGROUND_HOST = 'localhost';

try {
const result = await playgroundForAgent(createMockAgent()).launch({
port: 5924,
openBrowser: false,
verbose: false,
staticPath,
});

expect(result.host).toBe('localhost');
await result.close();
} finally {
if (originalHost === undefined) {
Reflect.deleteProperty(process.env, 'MIDSCENE_PLAYGROUND_HOST');
} else {
process.env.MIDSCENE_PLAYGROUND_HOST = originalHost;
}
}
});

it('should launch from agent factory and allow server configuration', async () => {
const agentFactory = vi.fn(async () => createMockAgent());
let configuredServer: any;
Expand Down
2 changes: 1 addition & 1 deletion packages/shared/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
"@types/node": "^18.0.0",
"@ui-tars/shared": "1.2.0",
"openai": "6.3.0",
"rimraf": "~3.0.2",
"rimraf": "~5.0.10",
"typescript": "^5.8.3",
"vitest": "3.0.5"
},
Expand Down
2 changes: 1 addition & 1 deletion packages/visualizer/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"react": "18.3.1",
"react-dom": "18.3.1",
"react-resizable-panels": "2.0.22",
"rimraf": "~3.0.2",
"rimraf": "~5.0.10",
"tsx": "^4.19.2",
"typescript": "^5.8.3",
"vitest": "3.0.5",
Expand Down
27 changes: 19 additions & 8 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading