bot.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689
  1. import markdown2
  2. import duckdb
  3. import tiktoken
  4. import magic
  5. import asyncio
  6. from PIL import Image
  7. from nio import (
  8. AsyncClient,
  9. AsyncClientConfig,
  10. WhoamiResponse,
  11. DevicesResponse,
  12. Event,
  13. Response,
  14. MatrixRoom,
  15. Api,
  16. RoomMessagesError,
  17. MegolmEvent,
  18. GroupEncryptionError,
  19. EncryptionError,
  20. RoomMessageText,
  21. RoomSendResponse,
  22. SyncResponse,
  23. RoomMessageNotice,
  24. JoinError,
  25. RoomLeaveError,
  26. RoomSendError,
  27. )
  28. from nio.crypto import Olm
  29. from typing import Optional, List, Dict, Tuple
  30. from configparser import ConfigParser
  31. from datetime import datetime
  32. from io import BytesIO
  33. import uuid
  34. import traceback
  35. from .logging import Logger
  36. from migrations import migrate
  37. from callbacks import RESPONSE_CALLBACKS, EVENT_CALLBACKS
  38. from commands import COMMANDS
  39. from .store import DuckDBStore
  40. from .openai import OpenAI
  41. from .wolframalpha import WolframAlpha
  42. from .trackingmore import TrackingMore
  43. class GPTBot:
  44. # Default values
  45. database: Optional[duckdb.DuckDBPyConnection] = None
  46. default_room_name: str = "GPTBot" # Default name of rooms created by the bot
  47. default_system_message: str = "You are a helpful assistant."
  48. # Force default system message to be included even if a custom room message is set
  49. force_system_message: bool = False
  50. max_tokens: int = 3000 # Maximum number of input tokens
  51. max_messages: int = 30 # Maximum number of messages to consider as input
  52. matrix_client: Optional[AsyncClient] = None
  53. sync_token: Optional[str] = None
  54. logger: Optional[Logger] = Logger()
  55. chat_api: Optional[OpenAI] = None
  56. image_api: Optional[OpenAI] = None
  57. classification_api: Optional[OpenAI] = None
  58. parcel_api: Optional[TrackingMore] = None
  59. operator: Optional[str] = None
  60. room_ignore_list: List[str] = [] # List of rooms to ignore invites from
  61. debug: bool = False
  62. @classmethod
  63. def from_config(cls, config: ConfigParser):
  64. """Create a new GPTBot instance from a config file.
  65. Args:
  66. config (ConfigParser): ConfigParser instance with the bot's config.
  67. Returns:
  68. GPTBot: The new GPTBot instance.
  69. """
  70. # Create a new GPTBot instance
  71. bot = cls()
  72. # Set the database connection
  73. bot.database = duckdb.connect(
  74. config["Database"]["Path"]) if "Database" in config and "Path" in config["Database"] else None
  75. # Override default values
  76. if "GPTBot" in config:
  77. bot.operator = config["GPTBot"].get("Operator", bot.operator)
  78. bot.default_room_name = config["GPTBot"].get(
  79. "DefaultRoomName", bot.default_room_name)
  80. bot.default_system_message = config["GPTBot"].get(
  81. "SystemMessage", bot.default_system_message)
  82. bot.force_system_message = config["GPTBot"].getboolean(
  83. "ForceSystemMessage", bot.force_system_message)
  84. bot.debug = config["GPTBot"].getboolean("Debug", bot.debug)
  85. bot.chat_api = bot.image_api = bot.classification_api = OpenAI(
  86. config["OpenAI"]["APIKey"], config["OpenAI"].get("Model"), bot.logger)
  87. bot.max_tokens = config["OpenAI"].getint("MaxTokens", bot.max_tokens)
  88. bot.max_messages = config["OpenAI"].getint(
  89. "MaxMessages", bot.max_messages)
  90. # Set up WolframAlpha
  91. if "WolframAlpha" in config:
  92. bot.calculation_api = WolframAlpha(
  93. config["WolframAlpha"]["APIKey"], bot.logger)
  94. # Set up TrackingMore
  95. if "TrackingMore" in config:
  96. bot.parcel_api = TrackingMore(
  97. config["TrackingMore"]["APIKey"], bot.logger)
  98. # Set up the Matrix client
  99. assert "Matrix" in config, "Matrix config not found"
  100. homeserver = config["Matrix"]["Homeserver"]
  101. bot.matrix_client = AsyncClient(homeserver)
  102. bot.matrix_client.access_token = config["Matrix"]["AccessToken"]
  103. bot.matrix_client.user_id = config["Matrix"].get("UserID")
  104. bot.matrix_client.device_id = config["Matrix"].get("DeviceID")
  105. # Return the new GPTBot instance
  106. return bot
  107. async def _get_user_id(self) -> str:
  108. """Get the user ID of the bot from the whoami endpoint.
  109. Requires an access token to be set up.
  110. Returns:
  111. str: The user ID of the bot.
  112. """
  113. assert self.matrix_client, "Matrix client not set up"
  114. user_id = self.matrix_client.user_id
  115. if not user_id:
  116. assert self.matrix_client.access_token, "Access token not set up"
  117. response = await self.matrix_client.whoami()
  118. if isinstance(response, WhoamiResponse):
  119. user_id = response.user_id
  120. else:
  121. raise Exception(f"Could not get user ID: {response}")
  122. return user_id
  123. async def _last_n_messages(self, room: str | MatrixRoom, n: Optional[int]):
  124. messages = []
  125. n = n or bot.max_messages
  126. room_id = room.room_id if isinstance(room, MatrixRoom) else room
  127. self.logger.log(
  128. f"Fetching last {2*n} messages from room {room_id} (starting at {self.sync_token})...")
  129. response = await self.matrix_client.room_messages(
  130. room_id=room_id,
  131. start=self.sync_token,
  132. limit=2*n,
  133. )
  134. if isinstance(response, RoomMessagesError):
  135. raise Exception(
  136. f"Error fetching messages: {response.message} (status code {response.status_code})", "error")
  137. for event in response.chunk:
  138. if len(messages) >= n:
  139. break
  140. if isinstance(event, MegolmEvent):
  141. try:
  142. event = await self.matrix_client.decrypt_event(event)
  143. except (GroupEncryptionError, EncryptionError):
  144. self.logger.log(
  145. f"Could not decrypt message {event.event_id} in room {room_id}", "error")
  146. continue
  147. if isinstance(event, (RoomMessageText, RoomMessageNotice)):
  148. if event.body.startswith("!gptbot ignoreolder"):
  149. break
  150. if (not event.body.startswith("!")) or (event.body.startswith("!gptbot")):
  151. messages.append(event)
  152. self.logger.log(f"Found {len(messages)} messages (limit: {n})")
  153. # Reverse the list so that messages are in chronological order
  154. return messages[::-1]
  155. def _truncate(self, messages: list, max_tokens: Optional[int] = None,
  156. model: Optional[str] = None, system_message: Optional[str] = None):
  157. max_tokens = max_tokens or self.max_tokens
  158. model = model or self.chat_api.chat_model
  159. system_message = self.default_system_message if system_message is None else system_message
  160. encoding = tiktoken.encoding_for_model(model)
  161. total_tokens = 0
  162. system_message_tokens = 0 if not system_message else (
  163. len(encoding.encode(system_message)) + 1)
  164. if system_message_tokens > max_tokens:
  165. self.logger.log(
  166. f"System message is too long to fit within token limit ({system_message_tokens} tokens) - cannot proceed", "error")
  167. return []
  168. total_tokens += system_message_tokens
  169. total_tokens = len(system_message) + 1
  170. truncated_messages = []
  171. for message in [messages[0]] + list(reversed(messages[1:])):
  172. content = message["content"]
  173. tokens = len(encoding.encode(content)) + 1
  174. if total_tokens + tokens > max_tokens:
  175. break
  176. total_tokens += tokens
  177. truncated_messages.append(message)
  178. return [truncated_messages[0]] + list(reversed(truncated_messages[1:]))
  179. async def _get_device_id(self) -> str:
  180. """Guess the device ID of the bot.
  181. Requires an access token to be set up.
  182. Returns:
  183. str: The guessed device ID.
  184. """
  185. assert self.matrix_client, "Matrix client not set up"
  186. device_id = self.matrix_client.device_id
  187. if not device_id:
  188. assert self.matrix_client.access_token, "Access token not set up"
  189. devices = await self.matrix_client.devices()
  190. if isinstance(devices, DevicesResponse):
  191. device_id = devices.devices[0].id
  192. return device_id
  193. async def process_command(self, room: MatrixRoom, event: RoomMessageText):
  194. self.logger.log(
  195. f"Received command {event.body} from {event.sender} in room {room.room_id}")
  196. command = event.body.split()[1] if event.body.split()[1:] else None
  197. await COMMANDS.get(command, COMMANDS[None])(room, event, self)
  198. def room_uses_classification(self, room: MatrixRoom | str) -> bool:
  199. """Check if a room uses classification.
  200. Args:
  201. room (MatrixRoom | str): The room to check.
  202. Returns:
  203. bool: Whether the room uses classification.
  204. """
  205. room_id = room.room_id if isinstance(room, MatrixRoom) else room
  206. with self.database.cursor() as cursor:
  207. cursor.execute(
  208. "SELECT value FROM room_settings WHERE room_id = ? AND setting = ?", (room_id, "use_classification"))
  209. result = cursor.fetchone()
  210. return False if not result else bool(int(result[0]))
  211. async def _event_callback(self, room: MatrixRoom, event: Event):
  212. self.logger.log("Received event: " + str(event.event_id), "debug")
  213. try:
  214. for eventtype, callback in EVENT_CALLBACKS.items():
  215. if isinstance(event, eventtype):
  216. await callback(room, event, self)
  217. except Exception as e:
  218. self.logger.log(
  219. f"Error in event callback for {event.__class__}: {e}", "error")
  220. if self.debug:
  221. await self.send_message(room, f"Error: {e}\n\n```\n{traceback.format_exc()}\n```", True)
  222. async def event_callback(self, room: MatrixRoom, event: Event):
  223. task = asyncio.create_task(self._event_callback(room, event))
  224. def room_uses_timing(self, room: MatrixRoom):
  225. """Check if a room uses timing.
  226. Args:
  227. room (MatrixRoom): The room to check.
  228. Returns:
  229. bool: Whether the room uses timing.
  230. """
  231. room_id = room.room_id
  232. with self.database.cursor() as cursor:
  233. cursor.execute(
  234. "SELECT value FROM room_settings WHERE room_id = ? AND setting = ?", (room_id, "use_timing"))
  235. result = cursor.fetchone()
  236. return False if not result else bool(int(result[0]))
  237. async def _response_callback(self, response: Response):
  238. for response_type, callback in RESPONSE_CALLBACKS.items():
  239. if isinstance(response, response_type):
  240. await callback(response, self)
  241. async def response_callback(self, response: Response):
  242. task = asyncio.create_task(self._response_callback(response))
  243. async def accept_pending_invites(self):
  244. """Accept all pending invites."""
  245. assert self.matrix_client, "Matrix client not set up"
  246. invites = self.matrix_client.invited_rooms
  247. for invite in invites.keys():
  248. if invite in self.room_ignore_list:
  249. self.logger.log(
  250. f"Ignoring invite to room {invite} (room is in ignore list)")
  251. continue
  252. self.logger.log(f"Accepting invite to room {invite}")
  253. response = await self.matrix_client.join(invite)
  254. if isinstance(response, JoinError):
  255. self.logger.log(
  256. f"Error joining room {invite}: {response.message}. Not trying again.", "error")
  257. leave_response = await self.matrix_client.room_leave(invite)
  258. if isinstance(leave_response, RoomLeaveError):
  259. self.logger.log(
  260. f"Error leaving room {invite}: {leave_response.message}", "error")
  261. self.room_ignore_list.append(invite)
  262. async def send_image(self, room: MatrixRoom, image: bytes, message: Optional[str] = None):
  263. """Send an image to a room.
  264. Args:
  265. room (MatrixRoom): The room to send the image to.
  266. image (bytes): The image to send.
  267. message (str, optional): The message to send with the image. Defaults to None.
  268. """
  269. self.logger.log(
  270. f"Sending image of size {len(image)} bytes to room {room.room_id}")
  271. bio = BytesIO(image)
  272. img = Image.open(bio)
  273. mime = Image.MIME[img.format]
  274. (width, height) = img.size
  275. self.logger.log(
  276. f"Uploading - Image size: {width}x{height} pixels, MIME type: {mime}")
  277. bio.seek(0)
  278. response, _ = await self.matrix_client.upload(
  279. bio,
  280. content_type=mime,
  281. filename="image",
  282. filesize=len(image)
  283. )
  284. self.logger.log("Uploaded image - sending message...")
  285. content = {
  286. "body": message or "",
  287. "info": {
  288. "mimetype": mime,
  289. "size": len(image),
  290. "w": width,
  291. "h": height,
  292. },
  293. "msgtype": "m.image",
  294. "url": response.content_uri
  295. }
  296. status = await self.matrix_client.room_send(
  297. room.room_id,
  298. "m.room.message",
  299. content
  300. )
  301. self.logger.log(str(status), "debug")
  302. self.logger.log("Sent image")
  303. async def send_message(self, room: MatrixRoom | str, message: str, notice: bool = False):
  304. """Send a message to a room.
  305. Args:
  306. room (MatrixRoom): The room to send the message to.
  307. message (str): The message to send.
  308. notice (bool): Whether to send the message as a notice. Defaults to False.
  309. """
  310. if isinstance(room, str):
  311. room = self.matrix_client.rooms[room]
  312. markdowner = markdown2.Markdown(extras=["fenced-code-blocks"])
  313. formatted_body = markdowner.convert(message)
  314. msgtype = "m.notice" if notice else "m.text"
  315. msgcontent = {"msgtype": msgtype, "body": message,
  316. "format": "org.matrix.custom.html", "formatted_body": formatted_body}
  317. content = None
  318. if self.matrix_client.olm and room.encrypted:
  319. try:
  320. if not room.members_synced:
  321. responses = []
  322. responses.append(await self.matrix_client.joined_members(room.room_id))
  323. if self.matrix_client.olm.should_share_group_session(room.room_id):
  324. try:
  325. event = self.matrix_client.sharing_session[room.room_id]
  326. await event.wait()
  327. except KeyError:
  328. await self.matrix_client.share_group_session(
  329. room.room_id,
  330. ignore_unverified_devices=True,
  331. )
  332. if msgtype != "m.reaction":
  333. response = self.matrix_client.encrypt(
  334. room.room_id, "m.room.message", msgcontent)
  335. msgtype, content = response
  336. except Exception as e:
  337. self.logger.log(
  338. f"Error encrypting message: {e} - sending unencrypted", "error")
  339. raise
  340. if not content:
  341. msgtype = "m.room.message"
  342. content = msgcontent
  343. method, path, data = Api.room_send(
  344. self.matrix_client.access_token, room.room_id, msgtype, content, uuid.uuid4()
  345. )
  346. response = await self.matrix_client._send(RoomSendResponse, method, path, data, (room.room_id,))
  347. if isinstance(response, RoomSendError):
  348. self.logger.log(
  349. f"Error sending message: {response.message}", "error")
  350. return
  351. def log_api_usage(self, message: Event | str, room: MatrixRoom | str, api: str, tokens: int):
  352. """Log API usage to the database.
  353. Args:
  354. message (Event): The event that triggered the API usage.
  355. room (MatrixRoom | str): The room the event was sent in.
  356. api (str): The API that was used.
  357. tokens (int): The number of tokens used.
  358. """
  359. if not self.database:
  360. return
  361. if isinstance(message, Event):
  362. message = message.event_id
  363. if isinstance(room, MatrixRoom):
  364. room = room.room_id
  365. self.database.execute(
  366. "INSERT INTO token_usage (message_id, room_id, tokens, api, timestamp) VALUES (?, ?, ?, ?, ?)",
  367. (message, room, tokens, api, datetime.now())
  368. )
  369. async def run(self):
  370. """Start the bot."""
  371. # Set up the Matrix client
  372. assert self.matrix_client, "Matrix client not set up"
  373. assert self.matrix_client.access_token, "Access token not set up"
  374. if not self.matrix_client.user_id:
  375. self.matrix_client.user_id = await self._get_user_id()
  376. if not self.matrix_client.device_id:
  377. self.matrix_client.device_id = await self._get_device_id()
  378. # Set up database
  379. IN_MEMORY = False
  380. if not self.database:
  381. self.logger.log(
  382. "No database connection set up, using in-memory database. Data will be lost on bot shutdown.")
  383. IN_MEMORY = True
  384. self.database = DuckDBPyConnection(":memory:")
  385. self.logger.log("Running migrations...")
  386. before, after = migrate(self.database)
  387. if before != after:
  388. self.logger.log(f"Migrated from version {before} to {after}.")
  389. else:
  390. self.logger.log(f"Already at latest version {after}.")
  391. if IN_MEMORY:
  392. client_config = AsyncClientConfig(
  393. store_sync_tokens=True, encryption_enabled=False)
  394. else:
  395. matrix_store = DuckDBStore
  396. client_config = AsyncClientConfig(
  397. store_sync_tokens=True, encryption_enabled=True, store=matrix_store)
  398. self.matrix_client.config = client_config
  399. self.matrix_client.store = matrix_store(
  400. self.matrix_client.user_id,
  401. self.matrix_client.device_id,
  402. self.database
  403. )
  404. self.matrix_client.olm = Olm(
  405. self.matrix_client.user_id,
  406. self.matrix_client.device_id,
  407. self.matrix_client.store
  408. )
  409. self.matrix_client.encrypted_rooms = self.matrix_client.store.load_encrypted_rooms()
  410. # Run initial sync (now includes joining rooms)
  411. sync = await self.matrix_client.sync(timeout=30000)
  412. if isinstance(sync, SyncResponse):
  413. await self.response_callback(sync)
  414. else:
  415. self.logger.log(f"Initial sync failed, aborting: {sync}", "error")
  416. return
  417. # Set up callbacks
  418. self.matrix_client.add_event_callback(self.event_callback, Event)
  419. self.matrix_client.add_response_callback(
  420. self.response_callback, Response)
  421. # Start syncing events
  422. self.logger.log("Starting sync loop...")
  423. try:
  424. await self.matrix_client.sync_forever(timeout=30000)
  425. finally:
  426. self.logger.log("Syncing one last time...")
  427. await self.matrix_client.sync(timeout=30000)
  428. def respond_to_room_messages(self, room: MatrixRoom | str) -> bool:
  429. """Check whether the bot should respond to all messages sent in a room.
  430. Args:
  431. room (MatrixRoom | str): The room to check.
  432. Returns:
  433. bool: Whether the bot should respond to all messages sent in the room.
  434. """
  435. if isinstance(room, MatrixRoom):
  436. room = room.room_id
  437. with self.database.cursor() as cursor:
  438. cursor.execute(
  439. "SELECT value FROM room_settings WHERE room_id = ? AND setting = ?", (room, "always_reply"))
  440. result = cursor.fetchone()
  441. return True if not result else bool(int(result[0]))
  442. async def process_query(self, room: MatrixRoom, event: RoomMessageText, from_chat_command: bool = False):
  443. """Process a query message. Generates a response and sends it to the room.
  444. Args:
  445. room (MatrixRoom): The room the message was sent in.
  446. event (RoomMessageText): The event that triggered the query.
  447. from_chat_command (bool, optional): Whether the query was sent via the `!gptbot chat` command. Defaults to False.
  448. """
  449. if not (from_chat_command or self.respond_to_room_messages(room) or self.matrix_client.user_id in event.body):
  450. return
  451. await self.matrix_client.room_typing(room.room_id, True)
  452. await self.matrix_client.room_read_markers(room.room_id, event.event_id)
  453. if (not from_chat_command) and self.room_uses_classification(room):
  454. classification, tokens = self.classification_api.classify_message(
  455. event.body, room.room_id)
  456. self.log_api_usage(
  457. event, room, f"{self.classification_api.api_code}-{self.classification_api.classification_api}", tokens)
  458. if not classification["type"] == "chat":
  459. event.body = f"!gptbot {classification['type']} {classification['prompt']}"
  460. await self.process_command(room, event)
  461. return
  462. try:
  463. last_messages = await self._last_n_messages(room.room_id, 20)
  464. except Exception as e:
  465. self.logger.log(f"Error getting last messages: {e}", "error")
  466. await self.send_message(
  467. room, "Something went wrong. Please try again.", True)
  468. return
  469. system_message = self.get_system_message(room)
  470. chat_messages = [{"role": "system", "content": system_message}]
  471. for message in last_messages:
  472. role = "assistant" if message.sender == self.matrix_client.user_id else "user"
  473. if not message.event_id == event.event_id:
  474. chat_messages.append({"role": role, "content": message.body})
  475. chat_messages.append({"role": "user", "content": event.body})
  476. # Truncate messages to fit within the token limit
  477. truncated_messages = self._truncate(
  478. chat_messages, self.max_tokens - 1, system_message=system_message)
  479. try:
  480. response, tokens_used = self.chat_api.generate_chat_response(
  481. chat_messages, user=room.room_id)
  482. except Exception as e:
  483. self.logger.log(f"Error generating response: {e}", "error")
  484. await self.send_message(
  485. room, "Something went wrong. Please try again.", True)
  486. return
  487. if response:
  488. self.log_api_usage(
  489. event, room, f"{self.chat_api.api_code}-{self.chat_api.chat_api}", tokens_used)
  490. self.logger.log(f"Sending response to room {room.room_id}...")
  491. # Convert markdown to HTML
  492. message = await self.send_message(room, response)
  493. else:
  494. # Send a notice to the room if there was an error
  495. self.logger.log("Didn't get a response from GPT API", "error")
  496. await send_message(
  497. room, "Something went wrong. Please try again.", True)
  498. await self.matrix_client.room_typing(room.room_id, False)
  499. def get_system_message(self, room: MatrixRoom | str) -> str:
  500. """Get the system message for a room.
  501. Args:
  502. room (MatrixRoom | str): The room to get the system message for.
  503. Returns:
  504. str: The system message.
  505. """
  506. default = self.default_system_message
  507. if isinstance(room, str):
  508. room_id = room
  509. else:
  510. room_id = room.room_id
  511. with self.database.cursor() as cur:
  512. cur.execute(
  513. "SELECT value FROM room_settings WHERE room_id = ? AND setting = ?",
  514. (room_id, "system_message")
  515. )
  516. system_message = cur.fetchone()
  517. complete = ((default if ((not system_message) or self.force_system_message) else "") + (
  518. "\n\n" + system_message[0] if system_message else "")).strip()
  519. return complete
  520. def __del__(self):
  521. """Close the bot."""
  522. if self.matrix_client:
  523. asyncio.run(self.matrix_client.close())
  524. if self.database:
  525. self.database.close()