Sfoglia il codice sorgente

fix audio file location

subDesTagesMitExtraKaese 2 anni fa
parent
commit
50f3296892
4 ha cambiato i file con 30 aggiunte e 19 eliminazioni
  1. 1 1
      Dockerfile
  2. 10 4
      main.py
  3. 18 13
      speech_recognition.py
  4. 1 1
      whisper.cpp

+ 1 - 1
Dockerfile

@@ -29,6 +29,6 @@ COPY --from=builder /build/models/ /app/models/
 
 VOLUME /data/
 
-ADD . /app/
+ADD ./*.py /app/
 
 CMD ["python3", "-u", "main.py"]

+ 10 - 4
main.py

@@ -37,10 +37,16 @@ async def on_audio_message(room, event):
     result = asr.transcribe(response.body)
 
     await bot.async_client.room_typing(room.machine_name, False)
-    await bot.api.send_text_message(
-      room_id=room.room_id,
-      message=f"Transcription of {response.filename}: {result}",
-      msgtype="m.notice")
+    if response.filename:
+      await bot.api.send_text_message(
+        room_id=room.room_id,
+        message=f"Transcription of {response.filename}: {result}",
+        msgtype="m.notice")
+    else:
+      await bot.api.send_text_message(
+        room_id=room.room_id,
+        message=f"Transcription: {result}",
+        msgtype="m.notice")
 
 if __name__ == "__main__":
   bot.run()

+ 18 - 13
speech_recognition.py

@@ -1,6 +1,7 @@
 import ffmpeg
 import subprocess
-import tempfile
+from itertools import takewhile
+import os
 
 SAMPLE_RATE = 16000
 
@@ -10,7 +11,7 @@ def convert_audio(data: bytes) -> bytes:
     # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
     out, _ = (
       ffmpeg.input("pipe:", threads=0)
-      .output("-", format="wav", acodec="pcm_s16le", ac=1, ar=SAMPLE_RATE)
+      .output("audio.wav", format="wav", acodec="pcm_s16le", ac=1, ar=SAMPLE_RATE)
       .run(cmd="ffmpeg", capture_stdout=True, capture_stderr=True, input=data)
     )
   except ffmpeg.Error as e:
@@ -23,14 +24,18 @@ class ASR():
     self.model = model
 
   def transcribe(self, audio: bytes) -> str:
-    audio = convert_audio(audio)
-    with tempfile.NamedTemporaryFile("w+b") as file:
-      file.write(audio)
-      file.flush()
-      stdout, stderr = subprocess.Popen(
-          ["./main", "-m", f"models/ggml-{self.model}.bin", "-f", file.name], 
-          stdout=subprocess.PIPE
-        ).communicate()
-      if stderr:
-        print(stderr.decode())
-    return stdout.decode()
+    convert_audio(audio)
+    stdout, stderr = subprocess.Popen(
+        ["./main", "-m", f"models/ggml-{self.model}.bin", "-f", "audio.wav", "--no_timestamps"], 
+        stdout=subprocess.PIPE
+      ).communicate()
+
+    os.remove("audio.wav")
+
+    if stderr:
+      print(stderr.decode())
+
+    lines = stdout.decode().splitlines()[23:]
+    print('\n'.join(lines))
+    text = takewhile(lambda x: x, lines)
+    return '\n'.join(text)

+ 1 - 1
whisper.cpp

@@ -1 +1 @@
-Subproject commit 15b49e8baf495e62b65765ff3bd0437906b37680
+Subproject commit 6d654d192a62e6cd9897d6ff683bdc97406827e9