diff --git a/skyvern/forge/agent_functions.py b/skyvern/forge/agent_functions.py index 9bca9a2c..5846b794 100644 --- a/skyvern/forge/agent_functions.py +++ b/skyvern/forge/agent_functions.py @@ -14,6 +14,7 @@ from skyvern.forge import app from skyvern.forge.async_operations import AsyncOperation from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError +from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.models import Step, StepStatus from skyvern.forge.sdk.schemas.organizations import Organization from skyvern.forge.sdk.schemas.tasks import Task, TaskStatus @@ -41,7 +42,7 @@ def _should_css_shape_convert(element: Dict) -> bool: return False tag_name = element.get("tagName") - if tag_name not in ["a", "span", "i"]: + if tag_name not in ["a", "span", "i", "button"]: return False # should be without children @@ -53,7 +54,7 @@ def _should_css_shape_convert(element: Dict) -> bool: return False # if and we try to convert the shape - if tag_name in ["span", "i"]: + if tag_name in ["span", "i", "button"]: return True # if , it should be no text, no href/target attribute @@ -461,12 +462,23 @@ class AgentFunction: :param elements: List of elements to remove xpaths from. :return: List of elements without xpaths. """ + context = skyvern_context.ensure_context() + # page won't be in the context.frame_index_map, so the index is going to be 0 skyvern_frame = await SkyvernFrame.create_instance(frame=frame) + current_frame_index = context.frame_index_map.get(frame, 0) + queue = [] for element in element_tree: queue.append(element) while queue: queue_ele = queue.pop(0) + if queue_ele.get("frame_index") != current_frame_index: + new_frame = next( + (k for k, v in context.frame_index_map.items() if v == queue_ele.get("frame_index")), frame + ) + skyvern_frame = await SkyvernFrame.create_instance(frame=new_frame) + current_frame_index = queue_ele.get("frame_index", 0) + _remove_rect(queue_ele) await _convert_svg_to_string(skyvern_frame, queue_ele, task, step) diff --git a/skyvern/forge/prompts/skyvern/single-click-action.j2 b/skyvern/forge/prompts/skyvern/single-click-action.j2 index 55585377..c886df82 100644 --- a/skyvern/forge/prompts/skyvern/single-click-action.j2 +++ b/skyvern/forge/prompts/skyvern/single-click-action.j2 @@ -15,6 +15,7 @@ Reply in JSON format with the following keys: "action_type": str, // It's a string enum: "CLICK". "CLICK" type means there's an element you'd like to click. "id": str, // The id of the element to take action on. The id has to be one from the elements list. "download": bool, // If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download. + "file_url": str, // The url of the file to upload if applicable. This field can be present for CLICK only if the click is to upload the file. It should be null otherwise. }] }