Selaa lähdekoodia

add preloaded model images

subDesTagesMitExtraKaese 2 vuotta sitten
vanhempi
commit
7aa1076802
3 muutettua tiedostoa jossa 51 lisäystä ja 3 poistoa
  1. 40 0
      .github/workflows/docker.yml
  2. 4 0
      Dockerfile
  3. 7 3
      speech_recognition.py

+ 40 - 0
.github/workflows/docker.yml

@@ -35,3 +35,43 @@ jobs:
           platforms: linux/amd64,linux/arm64
           push: true
           tags: ftcaplan/matrix-stt-bot:latest
+      -
+        name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ftcaplan/matrix-stt-bot:tiny
+          build-args: |
+            "PRELOAD_MODEL=tiny"
+      -
+        name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ftcaplan/matrix-stt-bot:small
+          build-args: |
+            "PRELOAD_MODEL=small"
+      -
+        name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ftcaplan/matrix-stt-bot:medium
+          build-args: |
+            "PRELOAD_MODEL=medium"
+      -
+        name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ftcaplan/matrix-stt-bot:large
+          build-args: |
+            "PRELOAD_MODEL=large"

+ 4 - 0
Dockerfile

@@ -32,4 +32,8 @@ VOLUME /data/
 
 ADD ./*.py /app/
 
+ARG PRELOAD_MODEL
+ENV PRELOAD_MODEL ${PRELOAD_MODEL}
+RUN if [ -n "$PRELOAD_MODEL" ]; then wget "https://ggml.ggerganov.com/ggml-model-whisper-$PRELOAD_MODEL.bin"; fi
+
 CMD ["python3", "-u", "main.py"]

+ 7 - 3
speech_recognition.py

@@ -3,6 +3,7 @@ import ffmpeg
 import asyncio
 import subprocess
 import os
+import shutil
 
 SAMPLE_RATE = 16000
 
@@ -43,9 +44,12 @@ class ASR():
     self.lock = asyncio.Lock()
 
   def load_model(self):
-    if not os.path.exists(self.model_path):
-      print("Downloading model...")
-      subprocess.run(["wget", self.model_url, "-O", self.model_path], check=True)
+    if not os.path.exists(self.model_path) or os.path.getsize(self.model_path) == 0:
+      print("Fetching model...")
+      if os.path.exists(f"ggml-model-whisper-{self.model}.bin"):
+        shutil.copy(f"ggml-model-whisper-{self.model}.bin", self.model_path)
+      else:
+        subprocess.run(["wget", self.model_url, "-O", self.model_path], check=True)
       print("Done.")
 
   async def transcribe(self, audio: bytes) -> str: