diff --git a/skyvern/forge/prompts/skyvern/suggest-data-schema.j2 b/skyvern/forge/prompts/skyvern/suggest-data-schema.j2 index b8ea050b..edc0938b 100644 --- a/skyvern/forge/prompts/skyvern/suggest-data-schema.j2 +++ b/skyvern/forge/prompts/skyvern/suggest-data-schema.j2 @@ -1,35 +1,58 @@ -You are given an input string from a user. This string is a data extraction goal for an AI agent. It tells the agent what to do on a web page. +We are developing an interface for AI agent tasks that use JSON schemas to describe the shape of the data to that needs to extracted from a web page. -A data extraction goal describes what data to extract from the page. +You are given an input prompt from a user, and some additional context. Your goal is to generate a JSON schema given the user prompt and the context. -Your goal when given an input data extraction goal is to provide a JSONC schema describing a shape for the data to be extracted. +If additional context is given, try to use the it for further clues about the data that needs to be extracted. For example, the user might provide some detail about +product information to be extracted in a "data_extraction_goal" inside the context, but maybe not necessarily pass it in the input prompt. In these cases, you should use the +context. -Good data schema examples: +Here is an example: -Input data extraction goal: "Extract the title and link of the top post on Hacker News." -Suggested Data Schema: +User prompt: Generate a data schema that extracts the title and link for the posts as a list +Additional context: ```json { - "type": "object", - "properties": { - "title": { - "type": "string", - "description": "The title of the top post on Hacker News." - }, - "link": { - "type": "string", - "format": "uri", - "description": "The URL link to the top post on Hacker News." - } - }, - "required": [ - "title", - "link" - ] + "url": "https://news.ycombinator.com", + "data_extraction_goal": "Extract the title and link of the top 5 posts", + "existing_schema": "null" } ``` -Respond only with JSON output containing a single key "output" with the value of the suggested data schema given the following input data extraction goal: +Suggested Data Schema: +```json +{ + "posts" : { + "type": "array", + "items": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "Title of the post" + }, + "link": { + "type": "string", + "description": "Link to the post" + } + }, + "required": [ + "title", + "link" + ] + } + } +} ``` + +{% if additional_context %} + +You are provided some additional context about the suggestion here: + +{{additional_context}} + +{% endif %} + +Respond only with JSON output containing a single key "output" with the value of the suggested data schema given the following input: + {{ input }} -``` + diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 8ea82380..35299527 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -956,7 +956,7 @@ async def make_ai_suggestion( llm_prompt = "" if ai_suggestion_type == AISuggestionType.DATA_SCHEMA: - llm_prompt = prompt_engine.load_prompt("suggest-data-schema", input=data.input) + llm_prompt = prompt_engine.load_prompt("suggest-data-schema", input=data.input, additional_context=data.context) try: new_ai_suggestion = await app.DATABASE.create_ai_suggestion( diff --git a/skyvern/forge/sdk/schemas/ai_suggestions.py b/skyvern/forge/sdk/schemas/ai_suggestions.py index 00608a94..8e8d51e4 100644 --- a/skyvern/forge/sdk/schemas/ai_suggestions.py +++ b/skyvern/forge/sdk/schemas/ai_suggestions.py @@ -20,3 +20,4 @@ class AISuggestion(AISuggestionBase): class AISuggestionRequest(BaseModel): input: str = Field(..., min_length=1) + context: dict[str, Any] | None = None