%!s(int64=2) %!d(string=hai) anos · 651156a057
--- a/main.py
+++ b/main.py
@@ -1,13 +1,11 @@
 
				 #!/usr/bin/env python3
			
 
				 from urllib.parse import urlparse
			
 
				-import tempfile
			
 
				 import os
			
 
				 
			
 
				-import whisper
			
 
				 import simplematrixbotlib as botlib
			
 
				 import nio
			
 
				 
			
 
				-model = whisper.load_model("tiny")
			
 
				+from speech_recognition import ASR
			
 
				 
			
 
				 creds = botlib.Creds(
			
 
				   homeserver=os.environ['HOMESERVER'],
			
@@ -25,6 +23,8 @@ config.ignore_unverified_devices = True
 
				 config.store_path = '/data/crypto_store/'
			
 
				 bot = botlib.Bot(creds, config)
			
 
				 
			
 
				+asr = ASR(os.getenv('ASR_MODEL', 'tiny'))
			
 
				+
			
 
				 @bot.listener.on_custom_event(nio.RoomMessageAudio)
			
 
				 async def on_audio_message(room, event):
			
 
				   print(room.machine_name, event.sender, event.body, event.url)
			
@@ -34,10 +34,7 @@ async def on_audio_message(room, event):
 
				     url = urlparse(event.url)
			
 
				     response = await bot.async_client.download(server_name=url.netloc, media_id=url.path[1:])
			
 
				     print(response)
			
 
				-    with tempfile.NamedTemporaryFile("w+b") as file:
			
 
				-      file.write(response.body)
			
 
				-      file.flush()
			
 
				-      result = model.transcribe(file.name)
			
 
				+    result = asr.transcribe(response.body)
			
 
				 
			
 
				     await bot.async_client.room_typing(room.machine_name, False)
			
 
				     await bot.api.send_text_message(
			
@@ -45,4 +42,5 @@ async def on_audio_message(room, event):
 
				       message=f"Transcription of {response.filename}: {result['text']}",
			
 
				       msgtype="m.notice")
			
 
				 
			
 
				-bot.run()
			
 
				+if __name__ == "__main__":
			
 
				+  bot.run()
			
--- a/speech_recognition.py
+++ b/speech_recognition.py
@@ -0,0 +1,27 @@
 
				+import whisper
			
 
				+import ffmpeg
			
 
				+import numpy as np
			
 
				+
			
 
				+SAMPLE_RATE = 16000
			
 
				+
			
 
				+def load_audio(data: bytes):
			
 
				+  try:
			
 
				+    # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
			
 
				+    # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
			
 
				+    out, _ = (
			
 
				+      ffmpeg.input("pipe:", threads=0)
			
 
				+      .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=SAMPLE_RATE)
			
 
				+      .run(cmd="ffmpeg", capture_stdout=True, capture_stderr=True, input=data)
			
 
				+    )
			
 
				+  except ffmpeg.Error as e:
			
 
				+    raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
			
 
				+
			
 
				+  return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
			
 
				+
			
 
				+class ASR():
			
 
				+  def __init__(self, model = "tiny"):
			
 
				+    self.model = whisper.load_model(model)
			
 
				+
			
 
				+  def transcribe(self, audio: bytes):
			
 
				+    audio = load_audio(audio)
			
 
				+    return self.model.transcribe(audio)