2 年之前 · aedddaf812
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,22 +10,20 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
 
				 ADD whisper.cpp/ /build/
			
 
				 RUN gcc -pthread -O3 -march=native -c ggml.c && \
			
 
				     g++ -pthread -O3 -std=c++11 -c main.cpp && \
			
 
				-    g++ -pthread -o main ggml.o main.o && \
			
 
				-    ./download-ggml-model.sh tiny
			
 
				+    g++ -pthread -o main ggml.o main.o
			
 
				 
			
 
				 # main image
			
 
				 FROM alpine
			
 
				 WORKDIR /app/
			
 
				 
			
 
				 # Install dependencies
			
 
				-RUN apk add ffmpeg py3-olm py3-matrix-nio py3-pip py3-pillow gcompat
			
 
				+RUN apk add ffmpeg py3-olm py3-matrix-nio py3-pip py3-pillow gcompat wget
			
 
				 
			
 
				 ADD requirements.txt .
			
 
				 
			
 
				 RUN pip install -r requirements.txt
			
 
				 
			
 
				 COPY --from=builder /build/main /app/
			
 
				-COPY --from=builder /build/models/ /app/models/
			
 
				 
			
 
				 VOLUME /data/
			
 
				 
			
--- a/docker-compose.yml.sample
+++ b/docker-compose.yml.sample
@@ -9,4 +9,5 @@ services:
 
				       - "HOMESERVER=https://matrix.example.com"
			
 
				       - "USERNAME=@stt-bot:example.com"
			
 
				       - "PASSWORD=<password>"
			
 
				+      - "ASR_MODEL=tiny"
			
 
				       
			
--- a/main.py
+++ b/main.py
@@ -49,4 +49,5 @@ async def on_audio_message(room, event):
 
				         msgtype="m.notice")
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				+  asr.load_model()
			
 
				   bot.run()
			
--- a/speech_recognition.py
+++ b/speech_recognition.py
@@ -19,14 +19,27 @@ def convert_audio(data: bytes) -> bytes:
 
				 
			
 
				   return out
			
 
				 
			
 
				+MODELS = ["tiny.en", "tiny", "base.en", "base", "small.en", "small", "medium.en", "medium", "large"]
			
 
				+
			
 
				 class ASR():
			
 
				   def __init__(self, model = "tiny"):
			
 
				+    if model not in MODELS:
			
 
				+      raise ValueError(f"Invalid model: {model}. Must be one of {MODELS}")
			
 
				     self.model = model
			
 
				+    os.mkdir("/data/models")
			
 
				+    self.model_path = f"/data/models/ggml-{model}.bin"
			
 
				+    self.model_url = f"https://ggml.ggerganov.com/ggml-model-whisper-{self.model}.bin"
			
 
				+
			
 
				+  def load_model(self):
			
 
				+    if not os.path.exists(self.model_path):
			
 
				+      print("Downloading model...")
			
 
				+      subprocess.run(["wget", self.model_url, "-O", self.model_path], check=True)
			
 
				+      print("Done.")
			
 
				 
			
 
				   def transcribe(self, audio: bytes) -> str:
			
 
				     convert_audio(audio)
			
 
				     stdout, stderr = subprocess.Popen(
			
 
				-        ["./main", "-m", f"models/ggml-{self.model}.bin", "-f", "audio.wav", "--no_timestamps"], 
			
 
				+        ["./main", "-m", self.model_path, "-f", "audio.wav", "--no_timestamps"], 
			
 
				         stdout=subprocess.PIPE
			
 
				       ).communicate()