1 year ago · ad600faf4b
--- a/src/gptbot/classes/openai.py
+++ b/src/gptbot/classes/openai.py
@@ -4,6 +4,7 @@ import tiktoken
 
															 import asyncio
														
 
															 import json
														
 
															+import base64
														
 
															 from functools import partial
														
 
															 from contextlib import closing
														
@@ -387,7 +388,7 @@ Only the event_types mentioned above are allowed, you must not respond in any ot
 
															         Yields:
														
 
															             bytes: The audio data.
														
 
															         """
														
 
															-        self.logger.log(f"Generating speech from text '{text}'...")
														
 
															+        self.logger.log(f"Generating speech from text of length: {len(text.split())} words...")
														
 
															         speech = await self.openai_api.audio.speech.create(
														
 
															             model=self.tts_model,
														
@@ -475,3 +476,37 @@ Only the event_types mentioned above are allowed, you must not respond in any ot
 
															             images.append(image)
														
 
															         return images, len(images)
														
 
															+
														
 
															+    async def describe_images(self, messages: list, user: Optional[str] = None) -> Tuple[str, int]:
														
 
															+        """Generate a description for an image.
														
 
															+
														
 
															+        Args:
														
 
															+            image (bytes): The image data.
														
 
															+
														
 
															+        Returns:
														
 
															+            Tuple[str, int]: The description and the number of tokens used.
														
 
															+        """
														
 
															+        self.logger.log(f"Generating description for images in conversation...")
														
 
															+
														
 
															+        system_message = "You are an image description generator. You generate descriptions for all images in the current conversation, one after another."
														
 
															+
														
 
															+        messages = [
														
 
															+            {
														
 
															+                "role": "system",
														
 
															+                "content": system_message
														
 
															+            }
														
 
															+        ] + messages[1:]
														
 
															+
														
 
															+        if not "vision" in (chat_model := self.chat_model):
														
 
															+            chat_model = self.chat_model + "gpt-4-vision-preview"
														
 
															+
														
 
															+        chat_partial = partial(
														
 
															+            self.openai_api.chat.completions.create,
														
 
															+                model=self.chat_model,
														
 
															+                messages=messages,
														
 
															+                user=user,
														
 
															+        )
														
 
															+
														
 
															+        response = await self._request_with_retries(chat_partial)
														
 
															+
														
 
															+        return response.choices[0].message.content, response.usage.total_tokens
														
--- a/src/gptbot/tools/base.py
+++ b/src/gptbot/tools/base.py
@@ -4,9 +4,10 @@ class BaseTool:
 
															     def __init__(self, **kwargs):
														
 
															         self.kwargs = kwargs
														
 
															-        self.bot = kwargs["bot"]
														
 
															-        self.room = kwargs["room"]
														
 
															-        self.user = kwargs["user"]
														
 
															+        self.bot = kwargs.get("bot")
														
 
															+        self.room = kwargs.get("room")
														
 
															+        self.user = kwargs.get("user")
														
 
															+        self.messages = kwargs.get("messages", [])
														
 
															     async def run(self):
														
 
															         raise NotImplementedError()
														
--- a/src/gptbot/tools/imagedescription.py
+++ b/src/gptbot/tools/imagedescription.py
@@ -1,24 +1,15 @@
 
															 from .base import BaseTool, Handover
														
 
															 class Imagedescription(BaseTool):
														
 
															-    DESCRIPTION = "Describe the content of an image."
														
 
															+    DESCRIPTION = "Describe the content of the images in the conversation."
														
 
															     PARAMETERS = {
														
 
															         "type": "object",
														
 
															         "properties": {
														
 
															-            "image": {
														
 
															-                "type": "string",
														
 
															-                "description": "The image to describe.",
														
 
															-            },
														
 
															         },
														
 
															-        "required": ["image"],
														
 
															     }
														
 
															     async def run(self):
														
 
															-        """Describe an image.
														
 
															-        
														
 
															-        This tool only hands over to the original model, if applicable.
														
 
															-        It is intended to handle the case where GPT-3 thinks it is asked to
														
 
															-        *generate* an image, but the user actually wants to *describe* an
														
 
															-        image...
														
 
															-        """
														
 
															-        raise Handover()
														
 
															+        """Describe images in the conversation."""
														
 
															+        image_api = self.bot.image_api
														
 
															+
														
 
															+        return (await image_api.describe_images(self.messages, self.user))[0]