18 Commits 013cc9f226 ... 067856f5eb

Author SHA1 Message Date
  subDesTagesMitExtraKaese 067856f5eb update build script to debian bookworm 1 month ago
  subDesTagesMitExtraKaese 377763ced2 allow loading external models 1 month ago
  subDesTagesMitExtraKaese 6aeb13ab96 remove preloaded images 1 month ago
  subDesTagesMitExtraKaese 670ec27f15 Revert "remove direct reference to matrix-nio" 1 month ago
  subDesTagesMitExtraKaese 6084187e14 fix call to smbl main 1 month ago
  subDesTagesMitExtraKaese 6e470c2a69 add aiohttp dependency 1 month ago
  subDesTagesMitExtraKaese ee554b4de2 Create LICENSE 1 month ago
  subDesTagesMitExtraKaese 8e0784ecca remove direct reference to matrix-nio 1 month ago
  subDesTagesMitExtraKaese bc5dd0b605 Merge remote-tracking branch 'origin/renovate/matrix-nio-0.x' 1 month ago
  subDesTagesMitExtraKaese 135d6b2c49 update build commands 1 month ago
  subDesTagesMitExtraKaese 3b255ce27e update models list 1 month ago
  subDesTagesMitExtraKaese 9b5326daaa update whisper.cpp 1 month ago
  Renovate Bot 7f85c0d107 Update dependency matrix-nio to v0.25.2 1 month ago
  Renovate Bot 49abc97416 Update dependency simplematrixbotlib to v2.12.3 1 month ago
  Renovate Bot 61d6fc7c27 Update dependency simplematrixbotlib to v2.12.2 2 months ago
  Renovate Bot 1a3cbe3da9 Update dependency simplematrixbotlib to v2.12.1 3 months ago
  Renovate Bot d238c06f8c Update python Docker tag to v3.13 5 months ago
  user 994fac4858 handle all errors without exiting 6 months ago
8 changed files with 120 additions and 149 deletions
  1. 1 81
      .github/workflows/docker.yml
  2. 22 24
      Dockerfile
  3. 21 0
      LICENSE
  4. 7 5
      README.md
  5. 21 8
      main.py
  6. 3 2
      requirements.txt
  7. 44 28
      speech_recognition.py
  8. 1 1
      whisper.cpp

+ 1 - 81
.github/workflows/docker.yml

@@ -34,84 +34,4 @@ jobs:
           context: .
           platforms: linux/amd64,linux/arm64/v8
           push: true
-          tags: ftcaplan/matrix-stt-bot:latest
-      -
-        name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64/v8
-          push: true
-          tags: ftcaplan/matrix-stt-bot:tiny
-          build-args: |
-            "PRELOAD_MODEL=tiny"
-      -
-        name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64/v8
-          push: true
-          tags: ftcaplan/matrix-stt-bot:tiny-q5_1
-          build-args: |
-            "PRELOAD_MODEL=tiny-q5_1"
-      -
-        name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64/v8
-          push: true
-          tags: ftcaplan/matrix-stt-bot:base
-          build-args: |
-            "PRELOAD_MODEL=base"
-      -
-        name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64/v8
-          push: true
-          tags: ftcaplan/matrix-stt-bot:base-q5_1
-          build-args: |
-            "PRELOAD_MODEL=base-q5_1"
-      -
-        name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64/v8
-          push: true
-          tags: ftcaplan/matrix-stt-bot:small
-          build-args: |
-            "PRELOAD_MODEL=small"
-      -
-        name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64/v8
-          push: true
-          tags: ftcaplan/matrix-stt-bot:small-q5_1
-          build-args: |
-            "PRELOAD_MODEL=small-q5_1"
-      -
-        name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64/v8
-          push: true
-          tags: ftcaplan/matrix-stt-bot:medium-q5_0
-          build-args: |
-            "PRELOAD_MODEL=medium-q5_0"
-      -
-        name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64/v8
-          push: true
-          tags: ftcaplan/matrix-stt-bot:large-q5_0
-          build-args: |
-            "PRELOAD_MODEL=large-q5_0"
+          tags: ftcaplan/matrix-stt-bot:latest

+ 22 - 24
Dockerfile

@@ -1,39 +1,37 @@
 # build image
-FROM debian:bullseye-slim AS builder
-WORKDIR /build/
-RUN apt-get update && apt-get install --no-install-recommends -y \
-    make gcc g++ wget \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
+FROM python:3.13-slim-bookworm AS builder
+WORKDIR /app/
+
+RUN apt-get update \
+ && apt-get install -y \
+    build-essential wget cmake git \
+    libolm-dev gcc g++ make libffi-dev
 
 # Install Whisper.cpp
-ADD whisper.cpp/ /build/
-RUN make
+ADD whisper.cpp/ .
+RUN cmake -B build && cmake --build build --config Release
+
+# Install dependencies
+ADD requirements.txt .
+RUN pip install --prefix="/python-libs" --no-warn-script-location -r requirements.txt
 
 # main image
-FROM python:3.12-slim-bullseye
+FROM python:3.13-slim-bookworm
 WORKDIR /app/
 
-# Install dependencies
+COPY --from=builder /python-libs /usr/local
+COPY --from=builder /usr/local/lib/libolm* /usr/local/lib/
+COPY --from=builder /app/build/bin/whisper-cli /app/build/src/libwhisper* /app/build/ggml/src/libggml* /app/
+
 RUN apt-get update && apt-get install -y \
-    ffmpeg libolm-dev gcc make wget\
+    ffmpeg wget \
  && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
-
-ADD requirements.txt .
-
-RUN pip install -r requirements.txt && \
-  apt-get remove -y gcc make && \
-  apt-get autoremove -y
-
-COPY --from=builder /build/main /app/
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* \
+ && ./whisper-cli --help > /dev/null
 
 VOLUME /data/
 
 ADD ./*.py /app/
-
-ARG PRELOAD_MODEL
-ENV PRELOAD_MODEL ${PRELOAD_MODEL}
-RUN if [ -n "$PRELOAD_MODEL" ]; then wget -nv "https://ggml.ggerganov.com/ggml-model-whisper-$PRELOAD_MODEL.bin"; fi
+ADD ./whisper.cpp/models/download-ggml-model.sh /app/
 
 CMD ["python3", "-u", "main.py"]

+ 21 - 0
LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 subDesTagesMitExtraKaese
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 7 - 5
README.md

@@ -27,14 +27,16 @@ services:
 ```
 
 ## Configuration
-The bot will download the model file on first run to reduce image size. Available models are `tiny.en`, `tiny`, `base.en`, `base`, `small.en`, `small`, `medium.en`, `medium`, and `large`. The default is `ASR_MODEL=tiny`.
 
-You can authenticate using tokens instead of a password by setting `LOGIN_TOKEN=<login-token>` or `ACCESS_TOKEN=<access-token>` instead of `PASSWORD=<password>`.
+- **ASR_MODEL**: You can choose a model by setting it with `ASR_MODEL`. 
+  
+  - Available models are for example `tiny.en`, `tiny`, `base`, `small`, `medium`, and `large-v3`. The full list is available on [Hugging Face](https://huggingface.co/ggerganov/whisper.cpp). 
+  
+  - The default is `ASR_MODEL=tiny`. The bot will download the model file on first run to reduce image size.
 
-- **ASR_MODEL**: You can choose a docker tag with the corresponding model pre downloaded or set it with `ASR_MODEL`. Available models are `tiny.en`, `tiny`, `base.en`, `base`, `small.en`, `small`, `medium.en`, `medium`, and `large`. The default is `ASR_MODEL=tiny`.
+  - You can load your own ggml models by providing them at the following path: `/data/models/ggml-$ASR_MODEL.bin`
 
-- **Authentication**:
-  - You can authenticate using tokens instead of a password:
+- **Authentication**: You can authenticate using tokens instead of a password:
     - Set `LOGIN_TOKEN=<login-token>` or `ACCESS_TOKEN=<access-token>` instead of `PASSWORD=<password>`.
 
 - **Allowlist**:

+ 21 - 8
main.py

@@ -2,7 +2,9 @@
 from urllib.parse import urlparse
 import os
 import time
+import traceback
 import asyncio
+import aiohttp
 
 import simplematrixbotlib as botlib
 import nio
@@ -28,7 +30,7 @@ if 'ALLOWLIST' in os.environ:
 
 bot = botlib.Bot(creds, config)
 
-asr = ASR(os.getenv('ASR_MODEL', os.getenv('PRELOAD_MODEL', 'tiny')), os.getenv('ASR_LANGUAGE', 'en'))
+asr = ASR(os.getenv('ASR_MODEL', 'tiny'), os.getenv('ASR_LANGUAGE', 'en'))
 
 @bot.listener.on_custom_event(nio.RoomMessage)
 async def on_message(room, event):
@@ -85,11 +87,22 @@ async def on_message(room, event):
         }
       })
 
+async def main():
+    asr.load_model()
+    while True:
+        try:
+            await bot.main()
+        except (asyncio.exceptions.TimeoutError, aiohttp.ClientError) as e:
+            print(f"Network issue: {e}")
+            traceback.print_exc()
+            print("Network issue, restarting...")
+            await asyncio.sleep(5)
+        except Exception as e:
+            print(f"Unexpected error: {e}")
+            traceback.print_exc()
+            print("Unexpected error, restarting...")
+            await asyncio.sleep(5)
+
 if __name__ == "__main__":
-  asr.load_model()
-  try:
-    bot.run()
-  except asyncio.exceptions.TimeoutError as e:
-    print(e)
-    print("Timeout, restarting...")
-    time.sleep(5)
+    asyncio.run(main())
+

+ 3 - 2
requirements.txt

@@ -1,3 +1,4 @@
-simplematrixbotlib==2.12.0
+simplematrixbotlib==2.12.3
+aiohttp==3.11.13
 matrix-nio[e2e]==0.24.0
-ffmpeg-python
+ffmpeg-python==0.2.0

+ 44 - 28
speech_recognition.py

@@ -28,44 +28,60 @@ def convert_audio(data: bytes, out_filename: str):
   return out
 
 MODELS = [
-  "tiny.en", 
-  "tiny.en-q5_1", 
-  "tiny", 
-  "tiny-q5_1", 
-  "base.en", 
-  "base.en-q5_1", 
-  "base", 
-  "base-q5_1", 
-  "small.en", 
-  "small.en-q5_1", 
-  "small", 
+  "tiny",
+  "tiny.en",
+  "tiny-q5_1",
+  "tiny.en-q5_1",
+  "tiny-q8_0",
+  "base",
+  "base.en",
+  "base-q5_1",
+  "base.en-q5_1",
+  "base-q8_0",
+  "small",
+  "small.en",
+  "small.en-tdrz",
   "small-q5_1",
-  "medium.en-q5_0", 
-  "medium-q5_0", 
-  "large-q5_0"
+  "small.en-q5_1",
+  "small-q8_0",
+  "medium",
+  "medium.en",
+  "medium-q5_0",
+  "medium.en-q5_0",
+  "medium-q8_0",
+  "large-v1",
+  "large-v2",
+  "large-v2-q5_0",
+  "large-v2-q8_0",
+  "large-v3",
+  "large-v3-q5_0",
+  "large-v3-turbo",
+  "large-v3-turbo-q5_0",
+  "large-v3-turbo-q8_0",
 ]
 
 class ASR():
   def __init__(self, model = "tiny", language = "en"):
-    if model not in MODELS:
+    if os.path.exists(f"/app/ggml-{model}.bin"):
+      self.model_path = f"/app"
+    else:
+      self.model_path = f"/data/models"
+      if not os.path.exists(self.model_path):
+        os.mkdir(self.model_path)
+
+    file_path = f"{self.model_path}/ggml-{model}.bin"
+    if not os.path.exists(file_path) and model not in MODELS:
       raise ValueError(f"Invalid model: {model}. Must be one of {MODELS}")
+
     self.model = model
     self.language = language
-
-    if os.path.exists(f"/app/ggml-model-whisper-{model}.bin"):
-      self.model_path = f"/app/ggml-model-whisper-{model}.bin"
-    else:
-      self.model_path = f"/data/models/ggml-{model}.bin"
-      if not os.path.exists("/data/models"):
-        os.mkdir("/data/models")
-        
-    self.model_url = f"https://ggml.ggerganov.com/ggml-model-whisper-{self.model}.bin"
+    self.file_path = file_path
     self.lock = asyncio.Lock()
 
   def load_model(self):
-    if not os.path.exists(self.model_path) or os.path.getsize(self.model_path) == 0:
+    if not os.path.exists(self.file_path) or os.path.getsize(self.file_path) == 0:
       print("Downloading model...")
-      subprocess.run(["wget", "-nv", self.model_url, "-O", self.model_path], check=True)
+      subprocess.run(["./download-ggml-model.sh", self.model, self.model_path], check=True)
       print("Done.")
 
   async def transcribe(self, audio: bytes) -> str:
@@ -73,8 +89,8 @@ class ASR():
     convert_audio(audio, filename)
     async with self.lock:
       proc = await asyncio.create_subprocess_exec(
-          "./main",
-          "-m", self.model_path,
+          "./whisper-cli",
+          "-m", f"{self.model_path}/ggml-{self.model}.bin",
           "-l", self.language,
           "-f", filename,
           "-nt",

+ 1 - 1
whisper.cpp

@@ -1 +1 @@
-Subproject commit fff24a0148fe194df4997a738eeceddd724959c3
+Subproject commit fc7b1ee521739942dbe70ec2d2565b90fa0118cb