Przeglądaj źródła

Enhance Wikipedia tool flexibility

Added options to extract specific info and summarize content from Wikipedia pages within the gptbot's Wikipedia tool. The 'extract' option enables partial retrieval of page data based on a user-defined string, leveraging the bot's existing chat API for extraction. The 'summarize' option allows users to get concise versions of articles, again utilizing the bot's chat capabilities. These additions provide users with more granular control over the information they receive, potentially reducing response clutter and focusing on user-specified interests.
Kumi 11 miesięcy temu
rodzic
commit
31f001057a
1 zmienionych plików z 32 dodań i 15 usunięć
  1. 32 15
      src/gptbot/tools/wikipedia.py

+ 32 - 15
src/gptbot/tools/wikipedia.py

@@ -18,6 +18,15 @@ class Wikipedia(BaseTool):
                 "description": "The language to search in.",
                 "default": "en",
             },
+            "extract": {
+                "type": "string",
+                "description": "What information to extract from the page. If not provided, the full page will be returned."
+            },
+            "summarize": {
+                "type": "boolean",
+                "description": "Whether to summarize the page or not.",
+                "default": False,
+            }
         },
         "required": ["query"],
     }
@@ -28,7 +37,8 @@ class Wikipedia(BaseTool):
             raise Exception('No query provided.')
 
         language = self.kwargs.get("language", "en")
-        extract = self.kwargs.get("extract", False)
+        extract = self.kwargs.get("extract")
+        summarize = self.kwargs.get("summarize", False)
 
         args = {
             "action": "query",
@@ -36,13 +46,8 @@ class Wikipedia(BaseTool):
             "titles": query,
         }
 
-        if extract:
-            args["prop"] = "extracts"
-            args["exintro"] = ""
-
-        else:
-            args["prop"] = "revisions"
-            args["rvprop"] = "content"
+        args["prop"] = "revisions"
+        args["rvprop"] = "content"
 
         url = f'https://{language}.wikipedia.org/w/api.php?{urlencode(args)}'
 
@@ -50,13 +55,25 @@ class Wikipedia(BaseTool):
             async with session.get(url) as response:
                 if response.status == 200:
                     data = await response.json()
-                    pages = data['query']['pages']
-                    page = list(pages.values())[0]
-                    if 'extract' in page:
-                        return f"**{page['title']} (Extract)**\n{page['extract']}"
-                    elif 'revisions' in page:
-                        return f"**{page['title']}**\n{page['revisions'][0]['*']}"
-                    else:
+
+                    try:
+                        pages = data['query']['pages']
+                        page = list(pages.values())[0]
+                        content = page['revisions'][0]['*']
+                    except KeyError:
                         raise Exception(f'No results for {query} found in Wikipedia.')
+
+                    if extract:
+                        chat_messages = [{"role": "system", "content": f"Extract the following from the provided content: {extract}"}]
+                        chat_messages.append({"role": "user", "content": content})
+                        content, _ = await self.bot.chat_api.generate_chat_response(chat_messages, room=self.room, user=self.user, allow_override=False, use_tools=False)
+
+                    if summarize:
+                        chat_messages = [{"role": "system", "content": "Summarize the following content:"}]
+                        chat_messages.append({"role": "user", "content": content})
+                        content, _ = await self.bot.chat_api.generate_chat_response(chat_messages, room=self.room, user=self.user, allow_override=False, use_tools=False)
+
+                    return f"**Wikipedia: {page['title']}**\n{content}"
+
                 else:
                     raise Exception(f'Could not connect to Wikipedia API: {response.status} {response.reason}')