improve chain click logic (#4166)

2025-12-03 02:34:08 +08:00
parent 38b3568917
commit a02ccee769
3 changed files with 13 additions and 72 deletions
--- a/skyvern/library/embedded_server_factory.py
+++ b/skyvern/library/embedded_server_factory.py
@@ -1,15 +1,9 @@
 from typing import Any
 import httpx
 from httpx import ASGITransport
 from skyvern.forge.sdk.api.llm.config_registry import LLMConfigRegistry
 from skyvern.forge.sdk.api.llm.models import LLMConfig, LLMRouterConfig
 def create_embedded_server(
-    llm_config: LLMRouterConfig | LLMConfig | None = None,
+    openai_api_key: str | None,
    settings_overrides: dict[str, Any] | None = None,
 ) -> httpx.AsyncClient:
    class EmbeddedServerTransport(httpx.AsyncBaseTransport):
        def __init__(self) -> None:
@@ -21,20 +15,8 @@ def create_embedded_server(
                settings.BROWSER_LOGS_ENABLED = False
-                if llm_config:
+                if openai_api_key:
-                    LLMConfigRegistry.register_config(
+                    settings.OPENAI_API_KEY = openai_api_key
                        "CUSTOM_LLM",
                        llm_config,
                    )
                    settings.LLM_KEY = "CUSTOM_LLM"
                # Apply custom settings overrides
                if settings_overrides:
                    for key, value in settings_overrides.items():
                        if hasattr(settings, key):
                            setattr(settings, key, value)
                        else:
                            raise ValueError(f"Invalid setting: {key}")
                from skyvern.forge.api_app import create_api_app  # noqa: PLC0415
--- a/skyvern/library/skyvern.py
+++ b/skyvern/library/skyvern.py
@@ -9,7 +9,6 @@ from playwright.async_api import Playwright, async_playwright
 from skyvern.client import AsyncSkyvern, BrowserSessionResponse, SkyvernEnvironment
 from skyvern.client.types.task_run_response import TaskRunResponse
 from skyvern.client.types.workflow_run_response import WorkflowRunResponse
 from skyvern.forge.sdk.api.llm.models import LLMConfig, LLMRouterConfig
 from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT, DEFAULT_CDP_PORT
 from skyvern.library.embedded_server_factory import create_embedded_server
 from skyvern.library.skyvern_browser import SkyvernBrowser
@@ -107,55 +106,15 @@ class Skyvern(AsyncSkyvern):
    def __init__(
        self,
        *,
-        llm_config: LLMRouterConfig | LLMConfig | None = None,
+        openai_api_key: str | None = None,
        settings: dict[str, Any] | None = None,
    ) -> None:
        """Embedded mode: Run Skyvern locally in-process.
-        Prerequisites:
+        To use this mode, run `skyvern quickstart` first.
            Run `skyvern quickstart` first to set up your local environment and create a .env file.
        Args:
-            llm_config: Optional custom LLM configuration (LLMConfig or LLMRouterConfig).
+            openai_api_key: Optional OpenAI API key override for LLM operations.
-                If provided, this will be registered as "CUSTOM_LLM" and used as the primary LLM,
+                If not provided, the one from the .env file will be used.
                overriding the LLM_KEY setting from your .env file.
                If not provided, uses the LLM configured via LLM_KEY in your .env file.
                Example 1 - Using environment variables (recommended):
                    ```python
                    from skyvern import Skyvern
                    from skyvern.forge.sdk.api.llm.models import LLMConfig
                    # Assumes OPENAI_API_KEY is set in your environment
                    llm_config = LLMConfig(
                        model_name="gpt-4o",
                        required_env_vars=["OPENAI_API_KEY"],
                        supports_vision=True,
                        add_assistant_prefix=False,
                    )
                    skyvern = Skyvern(llm_config=llm_config)
                    ```
                Example 2 - Explicitly providing credentials:
                    ```python
                    from skyvern import Skyvern
                    from skyvern.forge.sdk.api.llm.models import LLMConfig, LiteLLMParams
                    llm_config = LLMConfig(
                        model_name="gpt-4o",
                        required_env_vars=[],  # No env vars required
                        supports_vision=True,
                        add_assistant_prefix=False,
                        litellm_params=LiteLLMParams(
                            api_base="https://api.openai.com/v1",
                            api_key="sk-...",  # Your API key
                        ),
                    )
                    skyvern = Skyvern(llm_config=llm_config)
                    ```
            settings: Optional dictionary of Skyvern settings to override.
                These override the corresponding settings from your .env file.
                Example: {"MAX_STEPS_PER_RUN": 100, "BROWSER_TYPE": "chromium-headful"}
        """
        ...
@@ -163,13 +122,12 @@ class Skyvern(AsyncSkyvern):
        self,
        *,
        environment: SkyvernEnvironment | None = None,
        openai_api_key: str | None = None,
        base_url: str | None = None,
        api_key: str | None = None,
        timeout: float | None = None,
        follow_redirects: bool | None = True,
        httpx_client: httpx.AsyncClient | None = None,
        llm_config: LLMRouterConfig | LLMConfig | None = None,
        settings: dict[str, Any] | None = None,
    ):
        if environment is None:
            if httpx_client is not None:
@@ -189,8 +147,7 @@ class Skyvern(AsyncSkyvern):
                timeout=timeout,
                follow_redirects=follow_redirects,
                httpx_client=create_embedded_server(
-                    llm_config=llm_config,
+                    openai_api_key=openai_api_key,
                    settings_overrides=settings,
                ),
            )
        else:
--- a/skyvern/webeye/actions/handler.py
+++ b/skyvern/webeye/actions/handler.py
@@ -2338,7 +2338,8 @@ async def chain_click(
                    locator=locator,
                )
                if bound_locator := await skyvern_element.find_bound_label_by_attr_id():
-                    await bound_locator.click(timeout=timeout)
+                    # click on (0, 0) to avoid playwright clicking on the wrong element by accident
                    await bound_locator.click(timeout=timeout, position={"x": 0, "y": 0})
                    action_results.append(ActionSuccess())
                    return action_results
            except Exception as e:
@@ -2354,7 +2355,8 @@ async def chain_click(
                    locator=locator,
                )
                if bound_locator := await skyvern_element.find_bound_label_by_direct_parent():
-                    await bound_locator.click(timeout=timeout)
+                    # click on (0, 0) to avoid playwright clicking on the wrong element by accident
                    await bound_locator.click(timeout=timeout, position={"x": 0, "y": 0})
                    action_results.append(ActionSuccess())
                    return action_results
            except Exception as e: