2 anni fa · 50f3296892
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,6 +29,6 @@ COPY --from=builder /build/models/ /app/models/
 
				 
			
 
				 VOLUME /data/
			
 
				 
			
 
				-ADD . /app/
			
 
				+ADD ./*.py /app/
			
 
				 
			
 
				 CMD ["python3", "-u", "main.py"]
			
--- a/main.py
+++ b/main.py
@@ -37,10 +37,16 @@ async def on_audio_message(room, event):
 
				     result = asr.transcribe(response.body)
			
 
				 
			
 
				     await bot.async_client.room_typing(room.machine_name, False)
			
 
				-    await bot.api.send_text_message(
			
 
				-      room_id=room.room_id,
			
 
				-      message=f"Transcription of {response.filename}: {result}",
			
 
				-      msgtype="m.notice")
			
 
				+    if response.filename:
			
 
				+      await bot.api.send_text_message(
			
 
				+        room_id=room.room_id,
			
 
				+        message=f"Transcription of {response.filename}: {result}",
			
 
				+        msgtype="m.notice")
			
 
				+    else:
			
 
				+      await bot.api.send_text_message(
			
 
				+        room_id=room.room_id,
			
 
				+        message=f"Transcription: {result}",
			
 
				+        msgtype="m.notice")
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				   bot.run()
			
--- a/speech_recognition.py
+++ b/speech_recognition.py
@@ -1,6 +1,7 @@
 
				 import ffmpeg
			
 
				 import subprocess
			
 
				-import tempfile
			
 
				+from itertools import takewhile
			
 
				+import os
			
 
				 
			
 
				 SAMPLE_RATE = 16000
			
 
				 
			
@@ -10,7 +11,7 @@ def convert_audio(data: bytes) -> bytes:
 
				     # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
			
 
				     out, _ = (
			
 
				       ffmpeg.input("pipe:", threads=0)
			
 
				-      .output("-", format="wav", acodec="pcm_s16le", ac=1, ar=SAMPLE_RATE)
			
 
				+      .output("audio.wav", format="wav", acodec="pcm_s16le", ac=1, ar=SAMPLE_RATE)
			
 
				       .run(cmd="ffmpeg", capture_stdout=True, capture_stderr=True, input=data)
			
 
				     )
			
 
				   except ffmpeg.Error as e:
			
@@ -23,14 +24,18 @@ class ASR():
 
				     self.model = model
			
 
				 
			
 
				   def transcribe(self, audio: bytes) -> str:
			
 
				-    audio = convert_audio(audio)
			
 
				-    with tempfile.NamedTemporaryFile("w+b") as file:
			
 
				-      file.write(audio)
			
 
				-      file.flush()
			
 
				-      stdout, stderr = subprocess.Popen(
			
 
				-          ["./main", "-m", f"models/ggml-{self.model}.bin", "-f", file.name], 
			
 
				-          stdout=subprocess.PIPE
			
 
				-        ).communicate()
			
 
				-      if stderr:
			
 
				-        print(stderr.decode())
			
 
				-    return stdout.decode()
			
 
				+    convert_audio(audio)
			
 
				+    stdout, stderr = subprocess.Popen(
			
 
				+        ["./main", "-m", f"models/ggml-{self.model}.bin", "-f", "audio.wav", "--no_timestamps"], 
			
 
				+        stdout=subprocess.PIPE
			
 
				+      ).communicate()
			
 
				+
			
 
				+    os.remove("audio.wav")
			
 
				+
			
 
				+    if stderr:
			
 
				+      print(stderr.decode())
			
 
				+
			
 
				+    lines = stdout.decode().splitlines()[23:]
			
 
				+    print('\n'.join(lines))
			
 
				+    text = takewhile(lambda x: x, lines)
			
 
				+    return '\n'.join(text)
			
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -1 +1 @@
 
				-Subproject commit 15b49e8baf495e62b65765ff3bd0437906b37680
			
 
				+Subproject commit 6d654d192a62e6cd9897d6ff683bdc97406827e9