2 anos atrás · 26f403c679
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
 
				+[submodule "whisper.cpp"]
			
 
				+	path = whisper.cpp
			
 
				+	url = https://github.com/ggerganov/whisper.cpp.git
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
 
				-FROM python:3.9-bullseye
			
 
				+FROM python:3-bullseye-slim
			
 
				 WORKDIR /app/
			
 
				 
			
 
				 # Install dependencies
			
@@ -7,24 +7,19 @@ RUN apt-get update && apt-get install -y \
 
				  && apt-get clean \
			
 
				  && rm -rf /var/lib/apt/lists/*
			
 
				 
			
 
				-# Install Whisper
			
 
				-RUN pip install git+https://github.com/openai/whisper.git
			
 
				-
			
 
				-# Install model files
			
 
				-RUN whisper --model tiny dummy.wav; exit 0
			
 
				-#RUN whisper --model base dummy.wav; exit 0
			
 
				-#RUN whisper --model small dummy.wav; exit 0
			
 
				-#RUN whisper --model medium dummy.wav; exit 0
			
 
				-#RUN whisper --model large dummy.wav; exit 0
			
 
				-#RUN whisper --model tiny.en dummy.wav; exit 0
			
 
				-#RUN whisper --model base.en dummy.wav; exit 0
			
 
				-#RUN whisper --model small.en dummy.wav; exit 0
			
 
				-#RUN whisper --model medium.en dummy.wav; exit 0
			
 
				-
			
 
				-ADD requirements.txt /app/
			
 
				+ADD requirements.txt .
			
 
				 
			
 
				 RUN pip install -r requirements.txt
			
 
				 
			
 
				+# Install Whisper
			
 
				+ADD whisper.cpp/ .
			
 
				+RUN cd whisper.cpp && \
			
 
				+    make tiny && \
			
 
				+    cp main ../whisper && \
			
 
				+    cp models/ .. && \
			
 
				+    cd .. && \
			
 
				+    rm -rf whisper.cpp/
			
 
				+
			
 
				 VOLUME /data/
			
 
				 
			
 
				 ADD . /app/
			
--- a/speech_recognition.py
+++ b/speech_recognition.py
@@ -1,10 +1,11 @@
 
				-import whisper
			
 
				 import ffmpeg
			
 
				+import subprocess
			
 
				+import tempfile
			
 
				 import numpy as np
			
 
				 
			
 
				 SAMPLE_RATE = 16000
			
 
				 
			
 
				-def load_audio(data: bytes):
			
 
				+def convert_audio(data: bytes) -> bytes:
			
 
				   try:
			
 
				     # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
			
 
				     # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
			
@@ -16,12 +17,21 @@ def load_audio(data: bytes):
 
				   except ffmpeg.Error as e:
			
 
				     raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
			
 
				 
			
 
				-  return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
			
 
				+  return out
			
 
				 
			
 
				 class ASR():
			
 
				   def __init__(self, model = "tiny"):
			
 
				-    self.model = whisper.load_model(model)
			
 
				+    self.model = model
			
 
				 
			
 
				-  def transcribe(self, audio: bytes):
			
 
				-    audio = load_audio(audio)
			
 
				-    return self.model.transcribe(audio)
			
 
				+  def transcribe(self, audio: bytes) -> str:
			
 
				+    audio = convert_audio(audio)
			
 
				+    with tempfile.NamedTemporaryFile("w+b") as file:
			
 
				+      file.write(audio)
			
 
				+      file.flush()
			
 
				+      stdout, stderr = subprocess.Popen(
			
 
				+          ["./whisper", "-m", f"models/ggml-{self.model}.bin", "-f", file.name], 
			
 
				+          stdout=subprocess.PIPE
			
 
				+        ).communicate()
			
 
				+      if stderr:
			
 
				+        print(stderr.decode())
			
 
				+    return stdout.decode()
			
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -0,0 +1 @@
 
				+Subproject commit 15b49e8baf495e62b65765ff3bd0437906b37680
		`@@ -0,0 +1 @@`
		`+Subproject commit 15b49e8baf495e62b65765ff3bd0437906b37680`