simstudioai
diff --git a/‎apps/docs/content/docs/en/tools/browser_use.mdx‎
Lines changed: 25 additions & 5 deletions b/‎apps/docs/content/docs/en/tools/browser_use.mdx‎
Lines changed: 25 additions & 5 deletions
diff --git a/‎apps/docs/content/docs/en/tools/stagehand.mdx‎
Lines changed: 4 additions & 0 deletions b/‎apps/docs/content/docs/en/tools/stagehand.mdx‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎apps/sim/app/api/tools/stagehand/agent/route.ts‎
Lines changed: 43 additions & 5 deletions b/‎apps/sim/app/api/tools/stagehand/agent/route.ts‎
Lines changed: 43 additions & 5 deletions
diff --git a/‎apps/sim/app/api/tools/stagehand/extract/route.ts‎
Lines changed: 3 additions & 9 deletions b/‎apps/sim/app/api/tools/stagehand/extract/route.ts‎
Lines changed: 3 additions & 9 deletions
diff --git a/‎apps/sim/blocks/blocks/browser_use.ts‎
Lines changed: 129 additions & 11 deletions b/‎apps/sim/blocks/blocks/browser_use.ts‎
Lines changed: 129 additions & 11 deletions
@@ -42,9 +42,18 @@ Runs a browser automation task using BrowserUse
 | Parameter | Type | Required | Description |
 | --------- | ---- | -------- | ----------- |
 | `task` | string | Yes | What should the browser agent do |
-| `variables` | json | No | Optional variables to use as secrets \(format: \{key: value\}\) |
-| `save_browser_data` | boolean | No | Whether to save browser data |
-| `model` | string | No | LLM model to use \(default: gpt-4o\) |
+| `startUrl` | string | No | Initial page URL to start the agent on \(reduces navigation steps\) |
+| `variables` | json | No | Optional secrets injected into the task \(format: \{key: value\}\) |
+| `allowedDomains` | string | No | Comma-separated list of domains the agent is allowed to visit |
+| `maxSteps` | number | No | Maximum number of steps the agent may take \(default 100, max 10000\) |
+| `flashMode` | boolean | No | Enable flash mode \(faster, less careful navigation\) |
+| `thinking` | boolean | No | Enable extended reasoning mode |
+| `vision` | string | No | Vision capability: "true", "false", or "auto" |
+| `systemPromptExtension` | string | No | Optional text appended to the agent system prompt \(max 2000 chars\) |
+| `structuredOutput` | string | No | Stringified JSON schema for the structured output |
+| `highlightElements` | boolean | No | Highlight interactive elements on the page \(default true\) |
+| `metadata` | json | No | Custom key-value metadata \(up to 10 pairs\) for tracking |
+| `model` | string | No | LLM model identifier \(e.g. browser-use-2.0\) |
 | `apiKey` | string | Yes | API key for BrowserUse API |
 | `profile_id` | string | No | Browser profile ID for persistent sessions \(cookies, login state\) |
 
@@ -54,7 +63,18 @@ Runs a browser automation task using BrowserUse
 | --------- | ---- | ----------- |
 | `id` | string | Task execution identifier |
 | `success` | boolean | Task completion status |
-| `output` | json | Task output data |
-| `steps` | json | Execution steps taken |
+| `output` | json | Final task output \(string or structured\) |
+| `steps` | array | Steps the agent executed \(number, memory, nextGoal, url, actions, duration\) |
+|   ↳ `number` | number | Sequential step number |
+|   ↳ `memory` | string | Agent memory at this step |
+|   ↳ `evaluationPreviousGoal` | string | Evaluation of previous goal completion |
+|   ↳ `nextGoal` | string | Goal for the next step |
+|   ↳ `url` | string | Current URL of the browser |
+|   ↳ `screenshotUrl` | string | Optional screenshot URL |
+|   ↳ `actions` | array | Stringified JSON actions performed |
+|   ↳ `duration` | number | Step duration in seconds |
+| `liveUrl` | string | Embeddable live browser session URL \(active during execution\) |
+| `shareUrl` | string | Public shareable URL for the recorded session \(post-run\) |
+| `sessionId` | string | Browser Use session identifier |
 
 
@@ -72,6 +72,8 @@ Run an autonomous web agent to complete tasks and extract structured data
 | `provider` | string | No | AI provider to use: openai or anthropic |
 | `apiKey` | string | Yes | API key for the selected provider |
 | `outputSchema` | json | No | Optional JSON schema defining the structure of data the agent should return |
+| `mode` | string | No | Agent tool mode: dom \(default\), hybrid, or cua |
+| `maxSteps` | number | No | Maximum agent steps \(default 20, max 200\) |
 
 #### Output
 
@@ -92,5 +94,7 @@ Run an autonomous web agent to complete tasks and extract structured data
 |     ↳ `timestamp` | number | Unix timestamp when the action was performed |
 |     ↳ `timeMs` | number | Time in milliseconds \(for wait actions\) |
 | `structuredOutput` | object | Extracted data matching the provided output schema |
+| `liveViewUrl` | string | Embeddable Browserbase live view URL \(active only while the session is running\) |
+| `sessionId` | string | Browserbase session identifier |
 
 
@@ -22,6 +22,8 @@ const requestSchema = z.object({
   variables: z.any(),
   provider: z.enum(['openai', 'anthropic']).optional().default('openai'),
   apiKey: z.string(),
+  mode: z.enum(['dom', 'hybrid', 'cua']).optional().default('dom'),
+  maxSteps: z.number().int().min(1).max(200).optional().default(20),
 })
 
 /**
@@ -121,7 +123,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
     }
 
     const params = validationResult.data
-    const { task, startUrl: rawStartUrl, outputSchema, provider, apiKey } = params
+    const { task, startUrl: rawStartUrl, outputSchema, provider, apiKey, mode, maxSteps } = params
     const variablesObject = processVariables(params.variables)
 
     const startUrl = normalizeUrl(rawStartUrl)
@@ -165,8 +167,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
       return NextResponse.json({ error: 'Invalid Anthropic API key format' }, { status: 400 })
     }
 
-    const modelName =
-      provider === 'anthropic' ? 'anthropic/claude-sonnet-4-5-20250929' : 'openai/gpt-5'
+    const modelName = provider === 'anthropic' ? 'anthropic/claude-sonnet-4-6' : 'openai/gpt-5'
+
+    let sessionId: string | null = null
+    let liveViewUrl: string | null = null
 
     try {
       logger.info('Initializing Stagehand with Browserbase (v3)', { provider, modelName })
@@ -190,6 +194,35 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
       await stagehand.init()
       logger.info('Stagehand initialized successfully')
 
+      sessionId = stagehand.browserbaseSessionID ?? null
+      if (sessionId) {
+        try {
+          const debugResponse = await fetch(
+            `https://api.browserbase.com/v1/sessions/${sessionId}/debug`,
+            {
+              method: 'GET',
+              headers: {
+                'X-BB-API-Key': BROWSERBASE_API_KEY,
+              },
+            }
+          )
+          if (debugResponse.ok) {
+            const debugData = (await debugResponse.json()) as {
+              debuggerFullscreenUrl?: string
+              debuggerUrl?: string
+            }
+            liveViewUrl = debugData.debuggerFullscreenUrl ?? debugData.debuggerUrl ?? null
+            if (liveViewUrl) {
+              logger.info(`Browserbase live view URL: ${liveViewUrl}`)
+            }
+          } else {
+            logger.warn(`Failed to fetch Browserbase debug URL: ${debugResponse.statusText}`)
+          }
+        } catch (debugError) {
+          logger.warn('Error fetching Browserbase debug URL', { error: debugError })
+        }
+      }
+
       const page = stagehand.context.pages()[0]
       logger.info(`Navigating to ${startUrl}`)
       await page.goto(startUrl, { waitUntil: 'networkidle' })
@@ -223,13 +256,14 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
           apiKey: apiKey,
         },
         systemPrompt: agentInstructions,
+        mode,
       })
 
-      logger.info('Executing agent task', { task: taskWithVariables })
+      logger.info('Executing agent task', { task: taskWithVariables, mode, maxSteps })
 
       const agentExecutionResult = await agent.execute({
         instruction: taskWithVariables,
-        maxSteps: 20,
+        maxSteps,
       })
 
       const agentResult = {
@@ -293,6 +327,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
       return NextResponse.json({
         agentResult,
         structuredOutput,
+        liveViewUrl,
+        sessionId,
       })
     } catch (error) {
       logger.error('Stagehand agent execution error', {
@@ -327,6 +363,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
         {
           error: errorMessage,
           details: errorDetails,
+          liveViewUrl,
+          sessionId,
         },
         { status: 500 }
       )
 
@@ -17,8 +17,6 @@ const BROWSERBASE_PROJECT_ID = env.BROWSERBASE_PROJECT_ID
 const requestSchema = z.object({
   instruction: z.string(),
   schema: z.record(z.any()),
-  useTextExtract: z.boolean().optional().default(false),
-  selector: z.string().nullable().optional(),
   provider: z.enum(['openai', 'anthropic']).optional().default('openai'),
   apiKey: z.string(),
   url: z.string().url(),
@@ -51,7 +49,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
     }
 
     const params = validationResult.data
-    const { url: rawUrl, instruction, selector, provider, apiKey, schema } = params
+    const { url: rawUrl, instruction, provider, apiKey, schema } = params
     const url = normalizeUrl(rawUrl)
     const urlValidation = await validateUrlWithDNS(url, 'url')
     if (!urlValidation.isValid) {
@@ -101,8 +99,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
     }
 
     try {
-      const modelName =
-        provider === 'anthropic' ? 'anthropic/claude-sonnet-4-5-20250929' : 'openai/gpt-5'
+      const modelName = provider === 'anthropic' ? 'anthropic/claude-sonnet-4-6' : 'openai/gpt-5'
 
       logger.info('Initializing Stagehand with Browserbase (v3)', { provider, modelName })
 
@@ -162,14 +159,11 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
         logger.info('Calling stagehand.extract with options', {
           hasInstruction: !!instruction,
           hasSchema: !!zodSchema,
-          hasSelector: !!selector,
         })
 
         let extractedData
         if (zodSchema) {
-          extractedData = await stagehand.extract(instruction, zodSchema, {
-            selector: selector || undefined,
-          })
+          extractedData = await stagehand.extract(instruction, zodSchema)
         } else {
           extractedData = await stagehand.extract(instruction)
         }
 
@@ -23,6 +23,12 @@ export const BrowserUseBlock: BlockConfig<BrowserUseResponse> = {
       placeholder: 'Describe what the browser agent should do...',
       required: true,
     },
+    {
+      id: 'startUrl',
+      title: 'Start URL',
+      type: 'short-input',
+      placeholder: 'https://example.com (optional starting URL)',
+    },
     {
       id: 'variables',
       title: 'Variables (Secrets)',
@@ -51,22 +57,85 @@ export const BrowserUseBlock: BlockConfig<BrowserUseResponse> = {
         { label: 'Claude 3.7 Sonnet', id: 'claude-3-7-sonnet-20250219' },
         { label: 'Claude Sonnet 4', id: 'claude-sonnet-4-20250514' },
         { label: 'Claude Sonnet 4.5', id: 'claude-sonnet-4-5-20250929' },
+        { label: 'Claude Sonnet 4.6', id: 'claude-sonnet-4-6' },
         { label: 'Claude Opus 4.5', id: 'claude-opus-4-5-20251101' },
         { label: 'Llama 4 Maverick', id: 'llama-4-maverick-17b-128e-instruct' },
       ],
     },
-    {
-      id: 'save_browser_data',
-      title: 'Save Browser Data',
-      type: 'switch',
-      placeholder: 'Save browser data',
-    },
     {
       id: 'profile_id',
       title: 'Profile ID',
       type: 'short-input',
       placeholder: 'Enter browser profile ID (optional)',
     },
+    {
+      id: 'maxSteps',
+      title: 'Max Steps',
+      type: 'short-input',
+      placeholder: '100',
+      mode: 'advanced',
+    },
+    {
+      id: 'allowedDomains',
+      title: 'Allowed Domains',
+      type: 'short-input',
+      placeholder: 'example.com, docs.example.com',
+      mode: 'advanced',
+    },
+    {
+      id: 'vision',
+      title: 'Vision',
+      type: 'dropdown',
+      options: [
+        { label: 'Auto (default)', id: 'auto' },
+        { label: 'Enabled', id: 'true' },
+        { label: 'Disabled', id: 'false' },
+      ],
+      mode: 'advanced',
+    },
+    {
+      id: 'flashMode',
+      title: 'Flash Mode',
+      type: 'switch',
+      placeholder: 'Faster but less careful navigation',
+      mode: 'advanced',
+    },
+    {
+      id: 'thinking',
+      title: 'Thinking',
+      type: 'switch',
+      placeholder: 'Enable extended reasoning',
+      mode: 'advanced',
+    },
+    {
+      id: 'highlightElements',
+      title: 'Highlight Elements',
+      type: 'switch',
+      placeholder: 'Visually mark interactive elements',
+      mode: 'advanced',
+    },
+    {
+      id: 'systemPromptExtension',
+      title: 'System Prompt Extension',
+      type: 'long-input',
+      placeholder: 'Append custom instructions to the agent system prompt (max 2000 chars)',
+      mode: 'advanced',
+    },
+    {
+      id: 'structuredOutput',
+      title: 'Structured Output Schema',
+      type: 'code',
+      language: 'json',
+      placeholder: 'Stringified JSON schema for structured output',
+      mode: 'advanced',
+    },
+    {
+      id: 'metadata',
+      title: 'Metadata',
+      type: 'table',
+      columns: ['Key', 'Value'],
+      mode: 'advanced',
+    },
     {
       id: 'apiKey',
       title: 'API Key',
@@ -78,19 +147,68 @@ export const BrowserUseBlock: BlockConfig<BrowserUseResponse> = {
   ],
   tools: {
     access: ['browser_use_run_task'],
+    config: {
+      tool: () => 'browser_use_run_task',
+      params: (params) => {
+        const next: Record<string, any> = { ...params }
+        if (typeof next.maxSteps === 'string') {
+          const trimmed = next.maxSteps.trim()
+          if (trimmed === '') {
+            next.maxSteps = undefined
+          } else {
+            const n = Number(trimmed)
+            next.maxSteps = Number.isFinite(n) ? n : undefined
+          }
+        }
+        if (next.vision === 'true') next.vision = true
+        else if (next.vision === 'false') next.vision = false
+        if (next.metadata && Array.isArray(next.metadata)) {
+          const obj: Record<string, string> = {}
+          for (const row of next.metadata as Array<Record<string, any>>) {
+            const key = row?.cells?.Key ?? row?.Key
+            const value = row?.cells?.Value ?? row?.Value
+            if (key) obj[key] = String(value ?? '')
+          }
+          next.metadata = obj
+        }
+        return next
+      },
+    },
   },
   inputs: {
     task: { type: 'string', description: 'Browser automation task' },
+    startUrl: { type: 'string', description: 'Starting URL for the agent' },
     apiKey: { type: 'string', description: 'BrowserUse API key' },
-    variables: { type: 'json', description: 'Task variables' },
-    model: { type: 'string', description: 'AI model to use' },
-    save_browser_data: { type: 'boolean', description: 'Save browser data' },
+    variables: { type: 'json', description: 'Secrets to inject into the task' },
+    model: { type: 'string', description: 'LLM model to use' },
     profile_id: { type: 'string', description: 'Browser profile ID for persistent sessions' },
+    maxSteps: { type: 'number', description: 'Maximum agent steps' },
+    allowedDomains: { type: 'string', description: 'Comma-separated allowed domains' },
+    vision: { type: 'string', description: 'Vision capability (auto / true / false)' },
+    flashMode: { type: 'boolean', description: 'Enable flash mode' },
+    thinking: { type: 'boolean', description: 'Enable extended reasoning' },
+    highlightElements: { type: 'boolean', description: 'Highlight interactive elements' },
+    systemPromptExtension: { type: 'string', description: 'Custom system prompt extension' },
+    structuredOutput: { type: 'string', description: 'Stringified JSON schema' },
+    metadata: { type: 'json', description: 'Custom key-value metadata' },
   },
   outputs: {
     id: { type: 'string', description: 'Task execution identifier' },
     success: { type: 'boolean', description: 'Task completion status' },
-    output: { type: 'json', description: 'Task output data' },
-    steps: { type: 'json', description: 'Execution steps taken' },
+    output: { type: 'json', description: 'Final task output (string or structured)' },
+    steps: {
+      type: 'json',
+      description:
+        'Steps the agent executed (number, memory, evaluationPreviousGoal, nextGoal, url, screenshotUrl, actions, duration)',
+    },
+    liveUrl: {
+      type: 'string',
+      description: 'Embeddable live browser session URL (active during execution)',
+    },
+    shareUrl: {
+      type: 'string',
+      description: 'Public shareable URL for the session (post-run)',
+    },
+    sessionId: { type: 'string', description: 'Browser Use session identifier' },
   },
 }