há 1 ano atrás · ad600faf4b
--- a/src/gptbot/classes/openai.py
+++ b/src/gptbot/classes/openai.py
@@ -4,6 +4,7 @@ import tiktoken
 
				 
			
 
				 import asyncio
			
 
				 import json
			
 
				+import base64
			
 
				 
			
 
				 from functools import partial
			
 
				 from contextlib import closing
			
@@ -387,7 +388,7 @@ Only the event_types mentioned above are allowed, you must not respond in any ot
 
				         Yields:
			
 
				             bytes: The audio data.
			
 
				         """
			
 
				-        self.logger.log(f"Generating speech from text '{text}'...")
			
 
				+        self.logger.log(f"Generating speech from text of length: {len(text.split())} words...")
			
 
				 
			
 
				         speech = await self.openai_api.audio.speech.create(
			
 
				             model=self.tts_model,
			
@@ -475,3 +476,37 @@ Only the event_types mentioned above are allowed, you must not respond in any ot
 
				             images.append(image)
			
 
				 
			
 
				         return images, len(images)
			
 
				+
			
 
				+    async def describe_images(self, messages: list, user: Optional[str] = None) -> Tuple[str, int]:
			
 
				+        """Generate a description for an image.
			
 
				+
			
 
				+        Args:
			
 
				+            image (bytes): The image data.
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple[str, int]: The description and the number of tokens used.
			
 
				+        """
			
 
				+        self.logger.log(f"Generating description for images in conversation...")
			
 
				+
			
 
				+        system_message = "You are an image description generator. You generate descriptions for all images in the current conversation, one after another."
			
 
				+
			
 
				+        messages = [
			
 
				+            {
			
 
				+                "role": "system",
			
 
				+                "content": system_message
			
 
				+            }
			
 
				+        ] + messages[1:]
			
 
				+
			
 
				+        if not "vision" in (chat_model := self.chat_model):
			
 
				+            chat_model = self.chat_model + "gpt-4-vision-preview"
			
 
				+
			
 
				+        chat_partial = partial(
			
 
				+            self.openai_api.chat.completions.create,
			
 
				+                model=self.chat_model,
			
 
				+                messages=messages,
			
 
				+                user=user,
			
 
				+        )
			
 
				+
			
 
				+        response = await self._request_with_retries(chat_partial)
			
 
				+
			
 
				+        return response.choices[0].message.content, response.usage.total_tokens
			
--- a/src/gptbot/tools/base.py
+++ b/src/gptbot/tools/base.py
@@ -4,9 +4,10 @@ class BaseTool:
 
				 
			
 
				     def __init__(self, **kwargs):
			
 
				         self.kwargs = kwargs
			
 
				-        self.bot = kwargs["bot"]
			
 
				-        self.room = kwargs["room"]
			
 
				-        self.user = kwargs["user"]
			
 
				+        self.bot = kwargs.get("bot")
			
 
				+        self.room = kwargs.get("room")
			
 
				+        self.user = kwargs.get("user")
			
 
				+        self.messages = kwargs.get("messages", [])
			
 
				 
			
 
				     async def run(self):
			
 
				         raise NotImplementedError()
			
--- a/src/gptbot/tools/imagedescription.py
+++ b/src/gptbot/tools/imagedescription.py
@@ -1,24 +1,15 @@
 
				 from .base import BaseTool, Handover
			
 
				 
			
 
				 class Imagedescription(BaseTool):
			
 
				-    DESCRIPTION = "Describe the content of an image."
			
 
				+    DESCRIPTION = "Describe the content of the images in the conversation."
			
 
				     PARAMETERS = {
			
 
				         "type": "object",
			
 
				         "properties": {
			
 
				-            "image": {
			
 
				-                "type": "string",
			
 
				-                "description": "The image to describe.",
			
 
				-            },
			
 
				         },
			
 
				-        "required": ["image"],
			
 
				     }
			
 
				 
			
 
				     async def run(self):
			
 
				-        """Describe an image.
			
 
				-        
			
 
				-        This tool only hands over to the original model, if applicable.
			
 
				-        It is intended to handle the case where GPT-3 thinks it is asked to
			
 
				-        *generate* an image, but the user actually wants to *describe* an
			
 
				-        image...
			
 
				-        """
			
 
				-        raise Handover()
			
 
				+        """Describe images in the conversation."""
			
 
				+        image_api = self.bot.image_api
			
 
				+
			
 
				+        return (await image_api.describe_images(self.messages, self.user))[0]