Move the code over from private repository (#3)

2024-03-01 10:09:30 -08:00
parent 32dd6d92a5
commit 9eddb3d812
93 changed files with 16798 additions and 0 deletions
--- a/skyvern/forge/sdk/api/init.py
+++ b/skyvern/forge/sdk/api/init.py
--- a/skyvern/forge/sdk/api/aws.py
+++ b/skyvern/forge/sdk/api/aws.py
@@ -0,0 +1,134 @@
+from enum import StrEnum
+from typing import Any, Callable
+from urllib.parse import urlparse
+
+import aioboto3
+import structlog
+from aiobotocore.client import AioBaseClient
+
+from skyvern.forge.sdk.settings_manager import SettingsManager
+
+LOG = structlog.get_logger()
+
+
+class AWSClientType(StrEnum):
+    S3 = "s3"
+    SECRETS_MANAGER = "secretsmanager"
+
+
+def execute_with_async_client(client_type: AWSClientType) -> Callable:
+    def decorator(f: Callable) -> Callable:
+        async def wrapper(*args: list[Any], **kwargs: dict[str, Any]) -> Any:
+            self = args[0]
+            assert isinstance(self, AsyncAWSClient)
+            session = aioboto3.Session()
+            async with session.client(client_type) as client:
+                return await f(*args, client=client, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
+class AsyncAWSClient:
+    @execute_with_async_client(client_type=AWSClientType.SECRETS_MANAGER)
+    async def get_secret(self, secret_name: str, client: AioBaseClient = None) -> str | None:
+        try:
+            response = await client.get_secret_value(SecretId=secret_name)
+            return response["SecretString"]
+        except Exception as e:
+            try:
+                error_code = e.response["Error"]["Code"]  # type: ignore
+            except Exception:
+                error_code = "failed-to-get-error-code"
+            LOG.exception("Failed to get secret.", secret_name=secret_name, error_code=error_code, exc_info=True)
+            return None
+
+    @execute_with_async_client(client_type=AWSClientType.S3)
+    async def upload_file(self, uri: str, data: bytes, client: AioBaseClient = None) -> str | None:
+        try:
+            parsed_uri = S3Uri(uri)
+            await client.put_object(Body=data, Bucket=parsed_uri.bucket, Key=parsed_uri.key)
+            LOG.debug("Upload file success", uri=uri)
+            return uri
+        except Exception:
+            LOG.exception("S3 upload failed.", uri=uri)
+            return None
+
+    @execute_with_async_client(client_type=AWSClientType.S3)
+    async def upload_file_from_path(self, uri: str, file_path: str, client: AioBaseClient = None) -> None:
+        try:
+            parsed_uri = S3Uri(uri)
+            await client.upload_file(file_path, parsed_uri.bucket, parsed_uri.key)
+            LOG.info("Upload file from path success", uri=uri)
+        except Exception:
+            LOG.exception("S3 upload failed.", uri=uri)
+
+    @execute_with_async_client(client_type=AWSClientType.S3)
+    async def download_file(self, uri: str, client: AioBaseClient = None) -> bytes | None:
+        try:
+            parsed_uri = S3Uri(uri)
+            response = await client.get_object(Bucket=parsed_uri.bucket, Key=parsed_uri.key)
+            return await response["Body"].read()
+        except Exception:
+            LOG.exception("S3 download failed", uri=uri)
+            return None
+
+    @execute_with_async_client(client_type=AWSClientType.S3)
+    async def create_presigned_url(self, uri: str, client: AioBaseClient = None) -> str | None:
+        try:
+            parsed_uri = S3Uri(uri)
+            url = await client.generate_presigned_url(
+                "get_object",
+                Params={"Bucket": parsed_uri.bucket, "Key": parsed_uri.key},
+                ExpiresIn=SettingsManager.get_settings().PRESIGNED_URL_EXPIRATION,
+            )
+            return url
+        except Exception:
+            LOG.exception("Failed to create presigned url.", uri=uri)
+            return None
+
+
+class S3Uri(object):
+    # From: https://stackoverflow.com/questions/42641315/s3-urls-get-bucket-name-and-path
+    """
+    >>> s = S3Uri("s3://bucket/hello/world")
+    >>> s.bucket
+    'bucket'
+    >>> s.key
+    'hello/world'
+    >>> s.uri
+    's3://bucket/hello/world'
+
+    >>> s = S3Uri("s3://bucket/hello/world?qwe1=3#ddd")
+    >>> s.bucket
+    'bucket'
+    >>> s.key
+    'hello/world?qwe1=3#ddd'
+    >>> s.uri
+    's3://bucket/hello/world?qwe1=3#ddd'
+
+    >>> s = S3Uri("s3://bucket/hello/world#foo?bar=2")
+    >>> s.key
+    'hello/world#foo?bar=2'
+    >>> s.uri
+    's3://bucket/hello/world#foo?bar=2'
+    """
+
+    def __init__(self, uri: str) -> None:
+        self._parsed = urlparse(uri, allow_fragments=False)
+
+    @property
+    def bucket(self) -> str:
+        return self._parsed.netloc
+
+    @property
+    def key(self) -> str:
+        if self._parsed.query:
+            return self._parsed.path.lstrip("/") + "?" + self._parsed.query
+        else:
+            return self._parsed.path.lstrip("/")
+
+    @property
+    def uri(self) -> str:
+        return self._parsed.geturl()
--- a/skyvern/forge/sdk/api/chat_completion_price.py
+++ b/skyvern/forge/sdk/api/chat_completion_price.py
@@ -0,0 +1,25 @@
+from typing import Callable
+
+from pydantic import BaseModel
+
+openai_model_to_price_lambdas = {
+    "gpt-4-vision-preview": (0.01, 0.03),
+    "gpt-4-1106-preview": (0.01, 0.03),
+    "gpt-3.5-turbo": (0.001, 0.002),
+    "gpt-3.5-turbo-1106": (0.001, 0.002),
+}
+
+
+class ChatCompletionPrice(BaseModel):
+    input_token_count: int
+    output_token_count: int
+    openai_model_to_price_lambda: Callable[[int, int], float]
+
+    def __init__(self, input_token_count: int, output_token_count: int, model_name: str):
+        input_token_price, output_token_price = openai_model_to_price_lambdas[model_name]
+        super().__init__(
+            input_token_count=input_token_count,
+            output_token_count=output_token_count,
+            openai_model_to_price_lambda=lambda input_token, output_token: input_token_price * input_token / 1000
+            + output_token_price * output_token / 1000,
+        )
--- a/skyvern/forge/sdk/api/files.py
+++ b/skyvern/forge/sdk/api/files.py
@@ -0,0 +1,47 @@
+import os
+import tempfile
+import zipfile
+from urllib.parse import urlparse
+
+import requests
+import structlog
+
+LOG = structlog.get_logger()
+
+
+def download_file(url: str) -> str | None:
+    # Send an HTTP request to the URL of the file, stream=True to prevent loading the content at once into memory
+    r = requests.get(url, stream=True)
+
+    # Check if the request is successful
+    if r.status_code == 200:
+        # Parse the URL
+        a = urlparse(url)
+
+        # Get the file name
+        temp_dir = tempfile.mkdtemp(prefix="skyvern_downloads_")
+
+        file_name = os.path.basename(a.path)
+        file_path = os.path.join(temp_dir, file_name)
+
+        LOG.info(f"Downloading file to {file_path}")
+        with open(file_path, "wb") as f:
+            # Write the content of the request into the file
+            for chunk in r.iter_content(1024):
+                f.write(chunk)
+        LOG.info(f"File downloaded successfully to {file_path}")
+        return file_path
+    else:
+        LOG.error(f"Failed to download file, status code: {r.status_code}")
+        return None
+
+
+def zip_files(files_path: str, zip_file_path: str) -> str:
+    with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zipf:
+        for root, dirs, files in os.walk(files_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                arcname = os.path.relpath(file_path, files_path)  # Relative path within the zip
+                zipf.write(file_path, arcname)
+
+    return zip_file_path
--- a/skyvern/forge/sdk/api/open_ai.py
+++ b/skyvern/forge/sdk/api/open_ai.py
@@ -0,0 +1,221 @@
+import base64
+import json
+import random
+from datetime import datetime, timedelta
+from typing import Any
+
+import commentjson
+import openai
+import structlog
+from openai import AsyncOpenAI
+from openai.types.chat.chat_completion import ChatCompletion
+
+from skyvern.exceptions import InvalidOpenAIResponseFormat, NoAvailableOpenAIClients, OpenAIRequestTooBigError
+from skyvern.forge import app
+from skyvern.forge.sdk.api.chat_completion_price import ChatCompletionPrice
+from skyvern.forge.sdk.artifact.models import ArtifactType
+from skyvern.forge.sdk.models import Step
+from skyvern.forge.sdk.settings_manager import SettingsManager
+
+LOG = structlog.get_logger()
+
+
+class OpenAIKeyClientWrapper:
+    client: AsyncOpenAI
+    key: str
+    remaining_requests: int | None
+
+    def __init__(self, key: str, remaining_requests: int | None) -> None:
+        self.key = key
+        self.remaining_requests = remaining_requests
+        self.updated_at = datetime.utcnow()
+        self.client = AsyncOpenAI(api_key=self.key)
+
+    def update_remaining_requests(self, remaining_requests: int | None) -> None:
+        self.remaining_requests = remaining_requests
+        self.updated_at = datetime.utcnow()
+
+    def is_available(self) -> bool:
+        # If remaining_requests is None, then it's the first time we're trying this key
+        # so we can assume it's available, otherwise we check if it's greater than 0
+        if self.remaining_requests is None:
+            return True
+
+        if self.remaining_requests > 0:
+            return True
+
+        # If we haven't checked this in over 1 minutes, check it again
+        # Most of our failures are because of Tokens-per-minute (TPM) limits
+        if self.updated_at < (datetime.utcnow() - timedelta(minutes=1)):
+            return True
+
+        return False
+
+
+class OpenAIClientManager:
+    # TODO Support other models for requests without screenshots, track rate limits for each model and key as well if any
+    clients: list[OpenAIKeyClientWrapper]
+
+    def __init__(self, api_keys: list[str] = SettingsManager.get_settings().OPENAI_API_KEYS) -> None:
+        self.clients = [OpenAIKeyClientWrapper(key, None) for key in api_keys]
+
+    def get_available_client(self) -> OpenAIKeyClientWrapper | None:
+        available_clients = [client for client in self.clients if client.is_available()]
+
+        if not available_clients:
+            return None
+
+        # Randomly select an available client to distribute requests across our accounts
+        return random.choice(available_clients)
+
+    async def content_builder(
+        self,
+        step: Step,
+        screenshots: list[bytes] | None = None,
+        prompt: str | None = None,
+    ) -> list[dict[str, Any]]:
+        content: list[dict[str, Any]] = []
+
+        if prompt is not None:
+            content.append(
+                {
+                    "type": "text",
+                    "text": prompt,
+                }
+            )
+
+            await app.ARTIFACT_MANAGER.create_artifact(
+                step=step,
+                artifact_type=ArtifactType.LLM_PROMPT,
+                data=prompt.encode("utf-8"),
+            )
+        if screenshots:
+            for screenshot in screenshots:
+                encoded_image = base64.b64encode(screenshot).decode("utf-8")
+                content.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{encoded_image}",
+                        },
+                    }
+                )
+                # create artifact for each image
+                await app.ARTIFACT_MANAGER.create_artifact(
+                    step=step,
+                    artifact_type=ArtifactType.SCREENSHOT_LLM,
+                    data=screenshot,
+                )
+
+        return content
+
+    async def chat_completion(
+        self,
+        step: Step,
+        model: str = "gpt-4-vision-preview",
+        max_tokens: int = 4096,
+        temperature: int = 0,
+        screenshots: list[bytes] | None = None,
+        prompt: str | None = None,
+    ) -> dict[str, Any]:
+        LOG.info(
+            f"Sending LLM request",
+            task_id=step.task_id,
+            step_id=step.step_id,
+            num_screenshots=len(screenshots) if screenshots else 0,
+        )
+        messages = [
+            {
+                "role": "user",
+                "content": await self.content_builder(
+                    step=step,
+                    screenshots=screenshots,
+                    prompt=prompt,
+                ),
+            }
+        ]
+
+        chat_completion_kwargs = {
+            "model": model,
+            "messages": messages,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+        }
+
+        await app.ARTIFACT_MANAGER.create_artifact(
+            step=step,
+            artifact_type=ArtifactType.LLM_REQUEST,
+            data=json.dumps(chat_completion_kwargs).encode("utf-8"),
+        )
+        available_client = self.get_available_client()
+        if available_client is None:
+            raise NoAvailableOpenAIClients()
+        try:
+            response = await available_client.client.chat.completions.with_raw_response.create(**chat_completion_kwargs)
+        except openai.RateLimitError as e:
+            # If we get a RateLimitError, we can assume the key is not available anymore
+            if e.code == 429:
+                raise OpenAIRequestTooBigError(e.message)
+            LOG.warning(
+                "OpenAI rate limit exceeded, marking key as unavailable.", error_code=e.code, error_message=e.message
+            )
+            available_client.update_remaining_requests(remaining_requests=0)
+            available_client = self.get_available_client()
+            if available_client is None:
+                raise NoAvailableOpenAIClients()
+            return await self.chat_completion(
+                step=step,
+                model=model,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                screenshots=screenshots,
+                prompt=prompt,
+            )
+        # TODO: https://platform.openai.com/docs/guides/rate-limits/rate-limits-in-headers
+        # use other headers, x-ratelimit-limit-requests, x-ratelimit-limit-tokens, x-ratelimit-remaining-tokens
+        # x-ratelimit-reset-requests, x-ratelimit-reset-tokens to write a more accurate algorithm for managing api keys
+
+        # If we get a response, we can assume the key is available and update the remaining requests
+        ratelimit_remaining_requests = response.headers.get("x-ratelimit-remaining-requests")
+
+        if not ratelimit_remaining_requests:
+            LOG.warning("Invalid x-ratelimit-remaining-requests from OpenAI", response.headers)
+
+        available_client.update_remaining_requests(remaining_requests=int(ratelimit_remaining_requests))
+        chat_completion = response.parse()
+
+        if chat_completion.usage is not None:
+            # TODO (Suchintan): Is this bad design?
+            step = await app.DATABASE.update_step(
+                step_id=step.step_id,
+                task_id=step.task_id,
+                organization_id=step.organization_id,
+                chat_completion_price=ChatCompletionPrice(
+                    input_token_count=chat_completion.usage.prompt_tokens,
+                    output_token_count=chat_completion.usage.completion_tokens,
+                    model_name=model,
+                ),
+            )
+        await app.ARTIFACT_MANAGER.create_artifact(
+            step=step,
+            artifact_type=ArtifactType.LLM_RESPONSE,
+            data=chat_completion.model_dump_json(indent=2).encode("utf-8"),
+        )
+        parsed_response = self.parse_response(chat_completion)
+        await app.ARTIFACT_MANAGER.create_artifact(
+            step=step,
+            artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
+            data=json.dumps(parsed_response, indent=2).encode("utf-8"),
+        )
+        return parsed_response
+
+    def parse_response(self, response: ChatCompletion) -> dict[str, str]:
+        try:
+            content = response.choices[0].message.content
+            content = content.replace("```json", "")
+            content = content.replace("```", "")
+            if not content:
+                raise Exception("openai response content is empty")
+            return commentjson.loads(content)
+        except Exception as e:
+            raise InvalidOpenAIResponseFormat(str(response)) from e