From 26858ce65cee72b710a67b2fb5a6792d165f7c3c Mon Sep 17 00:00:00 2001 From: YASoftwareDev Date: Wed, 25 Mar 2026 23:24:35 +0100 Subject: [PATCH 1/5] release v3.2.8 --- .codespellrc | 3 + .dockerignore | 9 + .github/workflows/test.yml | 64 +++ .gitignore | 23 +- .gitmodules | 2 +- CHANGELOG.md | 28 +- Dockerfile | 42 +- README.md | 363 ++++++++----- docker-entrypoint.sh | 9 + docker/README.md | 24 - docker/{wav => audio}/.gitkeep | 0 docker/run.sh | 20 + docker/run_tts_client_python.sh | 19 - install.sh | 40 ++ mypy.ini | 30 ++ pyproject.toml | 5 + pytest.ini | 4 + requirements.txt | 6 - setup.py | 128 +++-- setup.sh | 14 +- tests/__init__.py | 0 tests/conftest.py | 64 +++ tests/test_channel.py | 120 +++++ tests/test_cli.py | 149 +++++ tests/test_helpers.py | 178 ++++++ tests/test_integration.py | 57 ++ tests/test_wave_utils.py | 158 ++++++ tox.ini | 40 ++ tts_client_python/VERSION.py | 2 +- tts_client_python/general.py | 664 +++++++++++++++-------- tts_client_python/lexicons.py | 199 ++++--- tts_client_python/recordings.py | 272 +++++++--- tts_client_python/tts_client.py | 924 +++++++++++++++++++++++++------- tts_client_python/wave_utils.py | 42 ++ 34 files changed, 2878 insertions(+), 824 deletions(-) create mode 100644 .codespellrc create mode 100644 .dockerignore create mode 100644 .github/workflows/test.yml create mode 100755 docker-entrypoint.sh delete mode 100644 docker/README.md rename docker/{wav => audio}/.gitkeep (100%) create mode 100755 docker/run.sh delete mode 100755 docker/run_tts_client_python.sh create mode 100755 install.sh create mode 100644 mypy.ini create mode 100644 pyproject.toml create mode 100644 pytest.ini delete mode 100644 requirements.txt create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_channel.py create mode 100644 tests/test_cli.py create mode 100644 tests/test_helpers.py create mode 100644 tests/test_integration.py create mode 100644 tests/test_wave_utils.py create mode 100644 tox.ini create mode 100644 tts_client_python/wave_utils.py diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000..191456d --- /dev/null +++ b/.codespellrc @@ -0,0 +1,3 @@ +[codespell] +# Ignore Polish language words that codespell flags as misspellings +ignore-words-list = tekst,numer,Tekst diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..db20ea7 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +build/ +**/*.egg-info/ +**/.eggs/ +.git/ +.git* +**/.gitlab/ +**/__pycache__/ +**/.venv/ +**/.vscode/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..0a8e14d --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,64 @@ +name: Tests + +on: + push: + branches: ["main", "master"] + pull_request: + +jobs: + test: + name: Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install system dependencies + run: sudo apt-get install -y libportaudio2 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + + - name: Generate proto stubs + run: | + uv venv .venv + source .venv/bin/activate + uv pip install --quiet "grpcio-tools>=1.70.0,<1.71.0" + PYTHONPATH=. python setup.py build_grpc + + - name: Run tests via tox + run: | + TOXENV="py$(echo '${{ matrix.python-version }}' | tr -d '.')" + uvx --with "tox-uv>=1" tox -e "${TOXENV}" + + test-py314: + name: Python 3.14 (allowed failure) + runs-on: ubuntu-latest + continue-on-error: true + + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install system dependencies + run: sudo apt-get install -y libportaudio2 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + + - name: Generate proto stubs + run: | + uv venv .venv + source .venv/bin/activate + uv pip install --quiet "grpcio-tools>=1.70.0,<1.71.0" + PYTHONPATH=. python setup.py build_grpc + + - name: Run tests via tox + run: uvx --with "tox-uv>=1" tox -e py314 diff --git a/.gitignore b/.gitignore index a9108f0..b5b3527 100644 --- a/.gitignore +++ b/.gitignore @@ -59,22 +59,33 @@ instance/ .ipynb_checkpoints # Environments +.env .venv +env/ venv/ ENV/ +env.bak/ venv.bak/ -# proto sources -tts_client_python/proto/* +# audio files +*.wav +*.ogg -# ignore files in directory `docker/wav` -docker/wav/* +# ignore files in directory `wav` +docker/audio/* -# ignore files in directory `docker/tls` +# ignore files in directory `tls` docker/tls/* -# ignore files in directory `docker/txt` +# ignore files in directory `txt` docker/txt/* +# ignore files in directory 'proto' +tts_client_python/proto/* + # but keep the directories with .gitkeep file !/**/.gitkeep + +# generated by setup.sh from internal pre-commit repo — not versioned +.pre-commit-config.yaml +pre-commit/ diff --git a/.gitmodules b/.gitmodules index c6736db..56a2afa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "submodules/tts-service-api"] path = submodules/tts-service-api - url = https://github.com/techmo-pl/tts-service-api.git + url = https://github.com/techmo-pl/tts-service-api diff --git a/CHANGELOG.md b/CHANGELOG.md index 26277e0..7fe0f5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,28 @@ # Techmo TTS gRPC Python client Changelog -**Note** - this project was previously developed in the github repository: https://github.com/techmo-pl/tts-client -To find the older versions of the application or check the history of changes, use the old repository. -The changelog below only describes the changes made within the new repository. +## [3.2.8] - 2026-03-25 +### Fixed + +- `tts_client_python/tts_client.py`: legal header corrected from "Techmo ASR Client" to "Techmo TTS Client". +- `README.md`: removed non-existent `-v` short flag from `--print-service-version` option table. +- `tests/conftest.py`: removed dead `asr_service_address` fixture (no test uses it). +- `pytest.ini`: removed dead `asr` marker and `not asr` from `addopts`. +- `tox.ini`: removed `ASR_*` from `passenv` (no ASR tests exist). + + +## [3.2.7] - 2026-03-23 -## [3.0.0] - 2022-07-07 ### Added -- Handling new proto messages for TTS Service API 3.0.0 + +- `install.sh`: check for `uv` before use and print install instructions. +- `install.sh`: check for uninitialised `tts-service-api` submodule at startup. +- `install.sh`: warn about missing `libportaudio2` after install completes. +- `README.md`: add `uv` to prerequisites with canonical install command. + +### Fixed + +- `setup.py`: proto generation now raises a clear `FileNotFoundError` when the + submodule is absent instead of a bare path error. +- `tests/conftest.py`: print `libportaudio2` install hint to stderr at session + start instead of relying on pytest's end-of-session warnings summary. diff --git a/Dockerfile b/Dockerfile index 9a62450..46c1909 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,16 @@ -FROM python:3.6-slim +FROM python:3.8-slim AS build-stage -LABEL maintainer="" ARG DEBIAN_FRONTEND=noninteractive +ENV PIP_ROOT_USER_ACTION=ignore -ADD ./tts_client_python /tts_client/tts_client_python -ADD ./requirements.txt setup.py README.md /tts_client/ +COPY submodules/tts-service-api /tts-client-python/submodules/tts-service-api +COPY tts_client_python /tts-client-python/tts_client_python +COPY setup.py pyproject.toml README.md /tts-client-python/ -WORKDIR /tts_client +WORKDIR /tts-client-python +# hadolint ignore=DL3008 RUN apt-get update \ - && apt-get dist-upgrade -y \ && apt-get install -y --no-install-recommends \ build-essential \ libportaudio2 \ @@ -17,8 +18,29 @@ RUN apt-get update \ python3-dev \ && apt-get clean \ && rm -fr /var/lib/apt/lists/* \ - && rm -fr /var/cache/apt/* \ - && pip3 install -r requirements.txt \ - && pip install -e . + && rm -fr /var/cache/apt/* -ENTRYPOINT ["python3", "tts_client_python/tts_client.py"] +# hadolint ignore=DL3013 +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir . + + +FROM python:3.8-slim + +LABEL maintainer="Techmo sp. z o.o. " + +# hadolint ignore=DL3008 +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + libportaudio2 \ + && apt-get clean \ + && rm -fr /var/lib/apt/lists/* \ + && rm -fr /var/cache/apt/* + +COPY --from=build-stage /usr/local/lib/python3.8/site-packages /usr/local/lib/python3.8/site-packages +COPY --from=build-stage /tts-client-python/tts_client_python /tts-client-python/tts_client_python + +WORKDIR /tts-client-python + +COPY ./docker-entrypoint.sh / +ENTRYPOINT ["/docker-entrypoint.sh"] diff --git a/README.md b/README.md index 645cbe8..4c1fd95 100644 --- a/README.md +++ b/README.md @@ -1,209 +1,274 @@ -# Python implementation of Techmo TTS gRPC client. +# Python implementation of Techmo TTS gRPC client -The `tts-client-python` can be used on the docker (a dedicated script to handle the docker image: `docker/run_tts_client_python.sh` runs in the bash shell) or directly as a python application (requires configuring the virtual environment first). Details are described below. +* [TLDR](#tldr) +* [Docker usage](#docker-usage) +* [Local instance usage](#local-instance-usage) -## Docker usage -### Build docker image +## TLDR + +### a) Docker + +Simple synthesis: +``` +./docker/run.sh --service-address SERVICE_HOST:SERVICE_PORT --text "Polski tekst do syntezy" +``` +Output file: +``` +file docker/audio/output.wav +``` -To prepare a docker image with Python implementation of the TTS Client, open the project's main directory and run following command: +### b) Local instance ``` -docker build -f Dockerfile -t techmo-tts-client-python:3.0.0 . +./setup.sh +./install.sh +source .venv/bin/activate ``` -The build process will take several minutes. -When the build process is complete, you will receive a message: +Simple synthesis: ``` -Successfully tagged techmo-tts-client-python:3.0.0 +python3 tts_client_python/tts_client.py --service-address "SERVICE_HOST:SERVICE_PORT" --text "Polski tekst do syntezy" +``` +For more examples, see the [Sample queries](#sample-queries) section. + +Output file: ``` +file output.wav +``` + + +## Docker usage + +The easiest way to run the application is to use a ready-made docker image. If the image is not available locally, you can build it manually. +To use the tts-client-python on a Docker container, open terminal in the tts-client-python/docker directory and launch `run.sh` script. -### Run TTS Client +To send a simple request to the TTS service, execute the following command from the docker directory: +``` +./run.sh --service-address=IP_ADDRESS:PORT --text="Sample text to be synthesised" +``` +The generated audio file will appear in the `tts-client-python/docker/audio` directory. -To use the TTS Client on a Docker container, go to the `tts-client-python/docker` directory and run `run_tts_client_python.sh` script. -To send a simple request to the TTS service, use: +To synthesize speech from a text file, place it in the `tts-client-python/docker/txt` directory and execute the command: ``` -./run_tts_client_python.sh --service-address=IP_ADDRESS:PORT --text="Sample text to be synthesised" +./run.sh --service-address=IP_ADDRESS:PORT --input-text-file /tts_client/txt/FILE_NAME.txt ``` -To print the list of available options, use: +To use an encrypted connection, place the tls certificates received from the service owner in the `tts-client-python/docker/tls` directory and execute the command: ``` -./run_tts_client_python.sh --help +./run.sh --service-address=IP_ADDRESS:PORT --text="Sample text to be synthesised" --tls-dir /tts_client/tls ``` -Output audio files will be created inside `tts-client-python/docker/wav` directory. -Source text files should be placed inside `tts-client-python/docker/txt` directory, if used. -**NOTE:** Unlike a local TTS Client instance, the `run_tts_client_python.sh` script doesn't allow to set custom paths to the input/output files. Instead it uses predefined directories (`wav` and `txt`). When using options: `--input-text-file (-i)` and `--output-file (-o)`, user should only provide filenames. +To print a complete list of available options, use: +``` +./run.sh --help +``` +For more information on building and using the docker image, see `doc/dev-guide.md` file. ## Local instance usage -### Before run +### Dependencies + +- Python >=3.8 +- `uv` Python package manager: `curl -LsSf https://astral.sh/uv/install.sh | sh` +- Required Linux system-level packages: + - python3-dev + - python3-pip + - libportaudio2 + +Other required dependencies will be installed automatically in the virtual environment. + +#### Python version compatibility + +| Python | Supported | Notes | +|--------|-----------|-------| +| 3.14 | ✅ | Tested (release candidate) | +| 3.13 | ✅ | | +| 3.12 | ✅ | | +| 3.11 | ✅ | | +| 3.10 | ✅ | | +| 3.9 | ✅ | | +| 3.8 | ✅ | Uses `grpcio<1.71.0` and `protobuf<6.0.0` (see below) | +| 3.7 | ❌ | Not supported (see below) | +| <3.7 | ❌ | Not supported | + +**Python 3.8 — lower dependency bounds** + +`grpcio 1.71.0` dropped Python 3.8 support, and `protobuf 6.x` requires Python 3.9+. +On Python 3.8 pip/uv will automatically select the compatible versions via PEP 508 environment +markers in `install_requires`: + +| Package | Python 3.9+ | Python 3.8 | +|---------|-------------|------------| +| `grpcio` | `>=1.70.0,<2.0.0` | `>=1.70.0,<1.71.0` | +| `protobuf` | `>=5.29.0` | `>=5.29.0,<6.0.0` | + +**Why Python 3.7 is not supported** -#### Dependencies - Linux +Three independent blockers make Python 3.7 support impractical: -Supported Python versions are: 3.6, 3.7, 3.8, 3.9. +1. **`grpcio`** — support dropped in `grpcio 1.63.0`; maximum usable version is `1.62.3`. +2. **`protobuf`** — `4.x` and `5.x` require Python 3.8+; maximum version available for + Python 3.7 is `3.20.3`. +3. **Proto stubs** — the generated `*_pb2.py` files import + `google.protobuf.runtime_version` (introduced in `protobuf 5.26.0`), which does not + exist in `protobuf 3.20.3`. This causes an `ImportError` at startup and cannot be + worked around without maintaining a separate set of legacy stubs generated with an + entirely different protobuf code-generator (the old `_descriptor`-based API). -Required Linux system-level packages: +Python 3.7 also reached end-of-life in June 2023. -- python3-dev -- python3-pip -- libportaudio2 +For more information on requirements and setup, see `doc/dev-guide.md` file. -To create the virtual environment and install other requirements, use script: +### Setup +To initialise submodules and install pre-commit hooks, run once after cloning: ``` ./setup.sh ``` -Then activate virtual environment: +To create a virtual environment and install the package: +``` +./install.sh +``` + +Before running the application, activate the virtual environment: ``` source .venv/bin/activate ``` -#### Dependencies - Windows +### Run -Supported Python versions are: 3.6, 3.7, 3.8, 3.9. +To send a simple request to the TTS service, execute the command: +``` +python3 tts_client_python/tts_client.py --service-address=IP_ADDRESS:PORT --text="Sample text to be synthesised" +``` -To create virtual environment and install dependencies temporarily change the PowerShell's execution policy to allow scripting. Start the PowerShell with `Run as Administrator` and use command: +**Available options:** + +| Option | Description | +|------------------------------------------------------------------|--------------| +| -h, --help | Shows this help message and exits. | +| --print-service-version | Prints version string of the service.| +| --print-resources-id | Prints identification string of the resources used by the service.| +| -s IP:PORT, --service-address IP:PORT | An IP address and port (address:port) of a service the client connects to.| +| -t TEXT, --text TEXT | A text to be synthesized. Each synthesis request must provide the input text using option `--text` (from the command line) or `--input-path` (from a text file).| +| -i INPUT_FILE, --input-path INPUT_FILE | A file with text to be synthesized. Each synthesis request must provide the input text using option `--text` (from the command line) or `--input-path` (from a text file).| +| -o OUT_PATH, --out-path OUT_PATH | A path to output audio file with synthesized speech content.| +| -l LANGUAGE_CODE, --language-code LANGUAGE_CODE | Language ISO 639-1 code of the voice to be used (optional, can be overridden by SSML).| +| -r RESPONSE_TYPE, --response RESPONSE_TYPE | Sets the type of response. Allowed values: "streaming" (default) or "single". According to the set response type, the streaming or non-streaming version of the Synthesize call is used.| +| --tls-dir TLS_DIR | If set to a path to the directory containing SSL/TLS files (client.crt, client.key, ca.crt), uses SSL/TLS authentication (required for both one-way and mutual authentication). If not set, uses insecure connection.| +| --play | Plays synthesized audio. Works only with pcm16 (default) encoding.| +| --session-id SESSION_ID | A session ID to be passed to the service. If not specified, the service generates a default session ID based on the timestamp of the request (in the form of: 'YYYYMMDD-hhmmss-xxx', where 'xxx' is the counter of sessions handled during the indicated second).| +| --grpc-timeout GRPC_TIMEOUT | A timeout in milliseconds used to set gRPC deadline - how long the client is willing to wait for a reply from the server (optional).| +| --sampling-rate SAMPLING_RATE | A sampling rate in Hz of synthesized audio. Set to 0 (default) to use voice's native sampling rate.| +| --ae ENCODING, --audio-encoding ENCODING | An encoding of the output audio, pcm16 (default), ogg-vorbis, ogg-opus, a-law, or mu-law.| +| --speech-pitch SPEECH_PITCH | Allows adjusting the default pitch of the synthesized speech (optional, can be overridden by SSML).| +| --speech-range SPEECH_RANGE | Allows adjusting the default range of the synthesized speech (optional, can be overridden by SSML).| +| --speech-rate SPEECH_RATE | Allows adjusting the default rate (speed) of the synthesized speech (optional, can be overridden by SSML).| +| --speech-stress SPEECH_STRESS | Allows adjusting the default stress of the synthesized speech (optional, can be overridden by SSML).| +| --speech-volume SPEECH_VOLUME | Allows adjusting the default volume of the synthesized speech (optional, can be overridden by SSML).| +| --list-voices | Lists all available voices.| +| --vn VOICE_NAME, --voice-name VOICE_NAME | A name of the voice to be used (optional, can be overridden by SSML).| +| --vg VOICE_GENDER, --voice-gender VOICE_GENDER | A gender of the voice to be used. Allowed values: 'female', 'male' (optional, can be overridden by SSML).| +| --va VOICE_AGE, --voice-age VOICE_AGE | An age of the voice to be used. Allowed values: 'adult', 'child', 'senile' (optional, can be overridden by SSML).| +| --voice-variant VOICE_VARIANT | A variant of the selected voice - positive integer (optional, can be overridden by SSML). Default value is 1.| +| --list-sound-icons | Lists all available sound icons for the requested voice. This request requires also arguments: --voice-name and --language, and may optionally specify --voice-variant (if not specified, the default variant (1) is used).| +| --list-recordings | Lists all available recordings for the requested voice. This request requires also arguments: --voice-name and --language, and may optionally specify --voice-variant (if not specified, the default variant (1) is used).| +| --get-recording RECORDING_KEY OUTPUT_PATH | Sends back the recording with the requested key for the requested voice in the linear PCM16 format. This request requires also arguments: --voice-name and --language, and may optionally specify --voice-variant (if not specified, the default variant (1) is used).| +| --put-recording RECORDING_KEY AUDIO_PATH | Adds a new recording with the requested key for the requested voice, or overwrites the existing one if there is already such a key defined. The recording has to be PCM16 WAV audio. This request requires also arguments: --voice-name and --language, and may optionally specify --voice-variant (if not specified, the default variant (1) is used).| +| --delete-recording RECORDING_KEY | Removes the recording with the requested key from the list of recordings of the requested voice. This request requires also arguments: --voice-name and --language, and may optionally specify --voice-variant (if not specified, the default variant (1) is used).| +| --list-lexicons | Lists all available pronunciation lexicons.| +| --get-lexicon LEXICON_URI OUTPUT_PATH | Saves content of the lexicon from the service-wide list of lexicon.| +| --put-lexicon LEXICON_URI LEXICON_PATH OUTSIDE_LOOKUP_BEHAVIOUR | Adds lexicon to the service-wide list of lexicons. LEXICON_URI - a custom string identifying a given lexicon at the service level. LEXICON_PATH - path to the lexicon file. OUTSIDE_LOOKUP_BEHAVIOUR - determines whether the service can use the lexicon automatically, without using the SSML tag. Must take one of two values: 'allowed' or 'disallowed'.| +| --delete-lexicon LEXICON_URI | Deletes lexicon from the service-wide list of lexicon.| + + +#### Sample queries + +Simple synthesis: +``` +python3 tts_client_python/tts_client.py --service-address "SERVICE_HOST:SERVICE_PORT" --text 'Polski tekst do syntezy' +``` +Simple synthesis with language selection: ``` -Set-ExecutionPolicy RemoteSigned +python3 tts_client_python/tts_client.py --service-address "SERVICE_HOST:SERVICE_PORT" --text 'Die Sprache der Synthese muss definiert werden.' --language-code "de" ``` -then confirm your choice. -Use Python 3 with virtual environment and install required packages (supported Python versions are: 3.6, 3.7, 3.8, 3.9): +SSML - simple prosody manipulation: +``` +python3 tts_client_python/tts_client.py --service-address "SERVICE_HOST:SERVICE_PORT" --text 'Mówię powoli i z krótkimi przerwami.' +``` +SSML - interpretation control (``): ``` -python3 -m venv .venv -.\.venv\Scripts\activate -pip install -r requirements.txt +python3 tts_client_python/tts_client.py --service-address "SERVICE_HOST:SERVICE_PORT" --text ' Twój numer to: AWS123 ' ``` -To switch back PowerShell's execution policy to the default, use command: +SSML - use of `format`: +``` +python3 tts_client_python/tts_client.py --service-address "SERVICE_HOST:SERVICE_PORT" --text ' Jest już 16:45 ' +``` +SSML - use of `detail`: ``` -Set-ExecutionPolicy Restricted +python3 tts_client_python/tts_client.py --service-address "SERVICE_HOST:SERVICE_PORT" --text ' Ali Baba i 40 rozbójników' ``` -#### Proto sources +### Running tests locally -To build the sources from `.proto`, run: -``` -./make_proto.sh +Proto stubs (`tts_client_python/proto/*_pb2.py`) are **not committed** to git. +Run the following once after cloning, then activate the venv and run tests: + +```bash +./setup.sh +./install.sh +source .venv/bin/activate +pytest ``` +> **Note on scripts:** `setup.sh` is a one-time bootstrapper (submodules + +> pre-commit hooks). `install.sh [VENV_PATH]` creates the virtualenv and +> installs the package — run it separately after `setup.sh`, or re-run it +> whenever you need to refresh the Python environment. -### Run +> **Multi-version testing** (optional): to run the full matrix across Python +> 3.8–3.14 (mirrors CI), run `uvx --with "tox-uv>=1" tox` instead of `pytest` +> after the setup script above. + +### Integration tests (require a live TTS service) + +Integration tests connect to a real TTS service and verify end-to-end audio synthesis. +They are excluded from the default `pytest` run and must be enabled explicitly via environment variables. + +| Variable | Required | Description | +|---|---|---| +| `TTS_SERVICE_ADDRESS` | Yes | `host:port` of a live TTS service | +| `TTS_VOICE_NAME` | No | Voice name to use (uses service default if unset) | +| `TTS_LANGUAGE_CODE` | No | ISO 639-1 language code (uses service default if unset) | + +Run integration tests locally: -To run the TTS Client, activate the virtual environment first: -- On Linux: +```bash +TTS_SERVICE_ADDRESS=host:port pytest -m integration ``` -source .venv/bin/activate + +With an explicit voice: + +```bash +TTS_SERVICE_ADDRESS=host:port TTS_VOICE_NAME=masza TTS_LANGUAGE_CODE=pl pytest -m integration ``` -- On Windows: -``` -.\.venv\Scripts\activate -``` -Then run TTS Client. Sample use: - -``` -python tts_client.py -s "192.168.1.1:4321" -f 44100 -t "Some text to be synthesized" -``` - -For each request you have to provide the service address and the input text (directly as argument's value or from text file). - - -## Usage: -``` -Basic usage: tts_client.py --service-address ADDRESS --text INPUT_TEXT -``` - -Available options: -``` - -h, --help show this help message and exit - -s IP:PORT, --service-address IP:PORT - An IP address and port (address:port) of a service the - client connects to. - --session-id SESSION_ID - A session ID to be passed to the service. If not - specified, the service generates a default session ID. - --grpc-timeout GRPC_TIMEOUT - A timeout in milliseconds used to set gRPC deadline - - how long the client is willing to wait for a reply - from the server (optional). - --list-voices Lists all available voices. - -r RESPONSE_TYPE, --response RESPONSE_TYPE - "streaming" or "single", calls the streaming (default) - or non-streaming version of Synthesize. - -t TEXT, --text TEXT A text to be synthesized. - -i INPUT_FILE, --input-text-file INPUT_FILE - A file with text to be synthesized. - -o OUT_PATH, --out-path OUT_PATH - A path to the output wave file with synthesized audio - content. - -f SAMPLE_RATE, --sample-rate SAMPLE_RATE - A sample rate in Hz of synthesized audio. Set to 0 - (default) to use voice's original sample rate. - --ae ENCODING, --audio-encoding ENCODING - An encoding of the output audio, pcm16 (default) or - ogg-vorbis. - --sp SPEECH_PITCH, --speech-pitch SPEECH_PITCH - Allows adjusting the default pitch of the synthesized - speech (optional, can be overriden by SSML). - --sr SPEECH_RANGE, --speech-range SPEECH_RANGE - Allows adjusting the default range of the synthesized - speech (optional, can be overriden by SSML). - --ss SPEECH_RATE, --speech-rate SPEECH_RATE - Allows adjusting the default rate (speed) of the - synthesized speech (optional, can be overriden by - SSML). - --sv SPEECH_VOLUME, --speech-volume SPEECH_VOLUME - Allows adjusting the default volume of the synthesized - speech (optional, can be overriden by SSML). - --vn VOICE_NAME, --voice-name VOICE_NAME - A name od the voice used to synthesize the phrase - (optional, can be overriden by SSML). - --vg VOICE_GENDER, --voice-gender VOICE_GENDER - A gender of the voice - female or male (optional, can - be overriden by SSML). - --va VOICE_AGE, --voice-age VOICE_AGE - An age of the voice - adult, child, or senile - (optional, can be overriden by SSML). - -l LANGUAGE, --language LANGUAGE - ISO 639-1 language code of the phrase to synthesize - (optional, can be overriden by SSML). - --play Play synthesized audio. Works only with pcm16 - (default) encoding. - --tls-dir TLS_DIR If set to a path with SSL/TLS credential files - (client.crt, client.key, ca.crt), use SSL/TLS - authentication. Otherwise use insecure channel - (default). - --list-lexicons Lists all available lexicons. - --get-lexicon LEXICON_NAME - Sends back the content of the lexicon with the - requested name. - --delete-lexicon LEXICON_NAME - Removes the lexicon with the requested name. - --put-lexicon LEXICON_NAME LEXICON_CONTENT - Adds a new lexicon with the requested name or - overwrites the existing one if there is already a - lexicon with such name. Content of the lexicon, shall - comply to https://www.w3.org/TR/pronunciation- - lexicon/. - --put-recording VOICE_NAME RECORDING_KEY AUDIO_PATH - Adds a new recording with the requested key for the - requested voice, or overwrites the existing one if - there is already such a key defined. The recording has - to be PCM16 WAV audio. - --delete-recording VOICE_NAME RECORDING_KEY - Removes the recording with the requested key from the - list of recordings of the requested voice. - --get-recording VOICE_NAME RECORDING_KEY OUTPUT_PATH - Sends back the recording with the requested key for - the requested voice in the linear PCM16 format. - --list-recordings VOICE_NAME - Lists all recording keys for the requested voice. + +Via tox (tests the active Python version): + +```bash +TTS_SERVICE_ADDRESS=host:port uvx --with "tox-uv>=1" tox -e py312 -- -m integration ``` + +In CI, set `TTS_SERVICE_ADDRESS` (and optionally `TTS_VOICE_NAME`, `TTS_LANGUAGE_CODE`) as +repository secrets/variables. The integration test job activates automatically when +`TTS_SERVICE_ADDRESS` is defined. diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100755 index 0000000..21d3283 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -euo pipefail + +if [ "$#" -eq 0 ] || [ "${1:0:1}" = '-' ]; then + set -- python3 tts_client_python/tts_client.py "$@" +fi + +exec "$@" diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index d4672e9..0000000 --- a/docker/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# TTS Client on Docker - -To use TTS Client docker version, `techmo-tts-client-python:IMAGE_VERSION` docker image has to be loaded locally. - -To build the docker image, run the following command in the project root directory: -``` -docker build -f Dockerfile -t techmo-tts-client-python:3.0.0 . -``` -To send requests to the TTS DNN Service, use `run_tts_client_python.sh` script. - - -## Output file - -The synthesized audio is saved inside the `wav` directory. -The default audio file name can be overwritten with the option: `--out-path "wav/custom_file_name"`, however the first part of the path should not be changed (the path is set inside the docker container's filesystem, and generated files can be obtained in the local filesystem only in specific directories mounted as docker volumes). - - -## Input text files - -Input text files should be placed inside the `txt` directory. The input path should be set as: `"txt/FILE_NAME"`, e.g.: `--input-text-file "txt/input_file.txt"` - -## TLS credencials - -If TLS encryption is used, the credencials files should be placed inside `tls` directory, and an additional option: `--tls-dir "tls"` should be used. diff --git a/docker/wav/.gitkeep b/docker/audio/.gitkeep similarity index 100% rename from docker/wav/.gitkeep rename to docker/audio/.gitkeep diff --git a/docker/run.sh b/docker/run.sh new file mode 100755 index 0000000..a39d6ec --- /dev/null +++ b/docker/run.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# coding=utf-8 + +# This script sends request to TTS DNN Service using TTS Client inside the docker container +# Requires "ghcr.io/techmo-pl/tts-client-python:$IMAGE_VERSION" docker image. +# Build it locally first if not available: docker build -t ghcr.io/techmo-pl/tts-client-python:$IMAGE_VERSION . + +set -euo pipefail +IFS=$'\n\t' + +IMAGE_VERSION=3.2.8 + +SCRIPT=$(realpath "$0") +SCRIPTPATH=$(dirname "${SCRIPT}") +docker_image="ghcr.io/techmo-pl/tts-client-python:${IMAGE_VERSION}" + +docker run --rm -it -v "${SCRIPTPATH}/audio:/tts_client/audio" -v "${SCRIPTPATH}/txt:/tts_client/txt" -v "${SCRIPTPATH}/tls:/tts_client/tls" --network host \ + "${docker_image}" \ + --out-path "/tts_client/audio/output.wav" \ + "$@" diff --git a/docker/run_tts_client_python.sh b/docker/run_tts_client_python.sh deleted file mode 100755 index f85fce4..0000000 --- a/docker/run_tts_client_python.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# coding=utf-8 - -# This script sends request to TTS DNN Service using TTS Client inside the docker container -# Requires "techmo-tts-client-python:$IMAGE_VERSION" docker image loaded locally - -set -euo pipefail -IFS=$'\n\t' - -IMAGE_VERSION=3.0.0 - -SCRIPT=$(realpath "$0") -SCRIPTPATH=$(dirname "${SCRIPT}") -docker_image="techmo-tts-client-python:${IMAGE_VERSION}" - -docker run --rm -it -v "${SCRIPTPATH}/wav:/tts_client/wav" -v "${SCRIPTPATH}/txt:/tts_client/txt" -v "${SCRIPTPATH}/tls:/tts_client/tls" --network host \ - "${docker_image}" \ - --out-path "wav/TechmoTTS.wav" \ - "$@" diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..a328258 --- /dev/null +++ b/install.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# +# usage: ./install.sh [VENV_PATH] +# +# VENV_PATH: Optional path for the virtual environment (default: ./.venv). +# +# Creates a virtualenv with uv and installs the package with test dependencies. + +set -euo pipefail + +if ! command -v uv &> /dev/null; then + echo "Error: 'uv' is required but not installed." >&2 + echo "Install it with: curl -LsSf https://astral.sh/uv/install.sh | sh" >&2 + echo "After installing, open a new shell or run: source ~/.bashrc (or ~/.zshrc)" >&2 + exit 1 +fi + +PROTO_SENTINEL="submodules/tts-service-api/proto/techmo_tts.proto" +if [ ! -f "${PROTO_SENTINEL}" ]; then + echo "Error: submodule 'tts-service-api' is not initialised." >&2 + echo "Run ./setup.sh first, then re-run ./install.sh." >&2 + exit 1 +fi + +VENV_PATH="${1:-.venv}" + +if [ ! -d "${VENV_PATH}" ]; then + uv venv "${VENV_PATH}" +fi + +# shellcheck disable=SC1091 +source "${VENV_PATH}/bin/activate" +uv pip install -e ".[test]" + +if ! ldconfig -p 2> /dev/null | grep -q 'libportaudio'; then + echo "" >&2 + echo "Warning: libportaudio2 not found on this system." >&2 + echo " Install it with: sudo apt-get install libportaudio2" >&2 + echo " (Required for sounddevice; tests will fail without it.)" >&2 +fi diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..596e2af --- /dev/null +++ b/mypy.ini @@ -0,0 +1,30 @@ +[mypy] +exclude = setup\.py + +# Exclude auto-generated protobuf files from strict type checking +[mypy-tts_client_python.proto.*] +ignore_errors = True + +# grpc stubs are incomplete +[mypy-grpc.*] +ignore_missing_imports = True + +# sounddevice has no stubs +[mypy-sounddevice.*] +ignore_missing_imports = True + +# lxml has no PEP 561 stubs package +[mypy-lxml.*] +ignore_missing_imports = True + +# numpy stubs ship with numpy>=1.20; suppress for environments lacking numpy +[mypy-numpy.*] +ignore_missing_imports = True + +# Relax strict checking for test code +[mypy-tests.*] +disallow_untyped_defs = False +disallow_incomplete_defs = False +disallow_untyped_calls = False +disallow_any_generics = False +disallow_untyped_decorators = False diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b9fc3f3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[build-system] +# grpcio-tools 1.71.0+ ships protobuf 6.x which generates stubs requiring Python>=3.9. +# Keep <1.71.0 so generated stubs stay compatible with Python 3.8 (protobuf 5.29.x). +requires = ["setuptools", "grpcio-tools>=1.70.0,<1.71.0"] +build-backend = "setuptools.build_meta" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..74cad0c --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + integration: marks tests requiring a live TTS service (deselect with '-m "not integration"') +addopts = -m "not integration" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index df85a3a..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -grpcio==1.38.1 -grpcio-tools==1.38.1 -lxml==4.7.1 -numpy==1.19.5 -protobuf==3.15.8 -sounddevice==0.4.1 diff --git a/setup.py b/setup.py index 0db5d0c..f6ffa8f 100644 --- a/setup.py +++ b/setup.py @@ -1,124 +1,134 @@ +from __future__ import annotations + from pathlib import Path +from typing import Any -from setuptools import Command -from setuptools import setup, find_packages +from setuptools import Command, find_packages, setup from setuptools.command.build_py import build_py from setuptools.command.develop import develop -from setuptools.command.install import install from setuptools.command.egg_info import egg_info -import pkg_resources -from tts_client_python.VERSION import TTS_CLIENT_PYTHON_VERSION as package_version +from setuptools.command.install import install + +# Read version without importing the package — avoids a circular import during +# the PEP-517 build phase (setup.py is executed before the package is installed). +_version_ns: dict[str, object] = {} +exec( # noqa: S102 + (Path(__file__).parent / "tts_client_python" / "VERSION.py").read_text(), + _version_ns, +) +package_version: str = str(_version_ns["TTS_CLIENT_PYTHON_VERSION"]) project_root = Path(__file__).parent -class BuildPackageProtos(Command): +class BuildPackageProtos(Command): # type: ignore[misc] """Command to generate project *_pb2.py modules from proto files.""" - user_options = [] + user_options: list[Any] = [] - def initialize_options(self): + def initialize_options(self) -> None: pass - def finalize_options(self): + def finalize_options(self) -> None: pass - def run(self): + def run(self) -> None: """Build gRPC modules.""" - from grpc_tools import protoc import shutil + import grpc_tools + from grpc_tools import protoc + + temp_proto_dir = project_root / "tts_service_api" try: - proto_file = ( - project_root - / "submodules" - / "tts-service-api" - / "proto" - / "techmo_tts.proto" - ) + proto_file = project_root / "submodules" / "tts-service-api" / "proto" / "techmo_tts.proto" output_path = project_root / "tts_client_python" / "proto" - well_known_protos_include = pkg_resources.resource_filename( - "grpc_tools", "_proto" - ) - temp_proto_dir = project_root / "tts_service_api" - Path.mkdir(temp_proto_dir) + if not proto_file.exists(): + raise FileNotFoundError( + f"Proto source file not found: {proto_file}\n" + "The 'tts-service-api' submodule is not initialised.\n" + "Run ./setup.sh first, then re-run ./install.sh." + ) + if grpc_tools.__file__ is None: + raise RuntimeError("Cannot locate grpc_tools package directory") + well_known_protos_include = str(Path(grpc_tools.__file__).parent / "_proto") + shutil.rmtree(temp_proto_dir, ignore_errors=True) + temp_proto_dir.mkdir() shutil.copy(proto_file, temp_proto_dir) - command = [ + command_1 = [ "grpc_tools.protoc", f"--proto_path={output_path.relative_to(project_root)}={temp_proto_dir.name}", - "--proto_path={}".format(well_known_protos_include), + f"--proto_path={well_known_protos_include}", f"--python_out={project_root.relative_to(project_root)}", f"--grpc_python_out={project_root.relative_to(project_root)}", ] + [str(temp_proto_dir.relative_to(project_root) / proto_file.name)] - if protoc.main(command) != 0: + if protoc.main(command_1) != 0: raise Exception("Problem with building gRPC modules") except Exception as e: print(e) + raise finally: shutil.rmtree(temp_proto_dir, ignore_errors=True) -class BuildPyGRPC(build_py): +class BuildPyGRPC(build_py): # type: ignore[misc] """Command for Python modules build.""" - def __init__(self, dist): + def __init__(self, dist: Any) -> None: """Create a sub-command to execute.""" self.subcommand = BuildPackageProtos(dist) super().__init__(dist) - def run(self): + def run(self) -> None: """Build Python and GRPC modules.""" super().run() self.subcommand.run() -class DevelopGRPC(develop): +class DevelopGRPC(develop): # type: ignore[misc] """Command for develop installation.""" - def __init__(self, dist): + def __init__(self, dist: Any) -> None: """Create a sub-command to execute.""" self.subcommand = BuildPackageProtos(dist) super().__init__(dist) - def run(self): + def run(self) -> None: """Build GRPC modules before the default installation.""" self.subcommand.run() super().run() -class CustomInstall(install): +class CustomInstall(install): # type: ignore[misc] """Command for pip installation.""" - def __init__(self, dist): + def __init__(self, dist: Any) -> None: """Create a sub-command to execute.""" self.subcommand = BuildPackageProtos(dist) super().__init__(dist) - def run(self): + def run(self) -> None: """Build GRPC modules before the default installation.""" self.subcommand.run() super().run() - self.subcommand.run() -class CustomEggInfo(egg_info): +class CustomEggInfo(egg_info): # type: ignore[misc] """Command for pip installation.""" - def __init__(self, dist): + def __init__(self, dist: Any) -> None: """Create a sub-command to execute.""" self.subcommand = BuildPackageProtos(dist) super().__init__(dist) - def run(self): + def run(self) -> None: """Build GRPC modules before the default installation.""" self.subcommand.run() super().run() -with open("README.md") as f: - long_description = f.read() setup( name="tts_client_python", version=package_version, @@ -130,20 +140,32 @@ def run(self): packages=find_packages(), include_package_data=True, install_requires=[ - "grpcio>=1.38.1, <2.0.0", - "protobuf>=3.15.8, <5.0.0", - "lxml>=4.6.4, <5.0.0", - "numpy>=1.19.5, <2.0.0", - "sounddevice>=0.4.0, <0.5.0", - ], - setup_requires=[ - "grpcio-tools>=1.38.1, <2.0.0", - "pip>=21.3.1, <23.0.0", + # Generated stubs embed GRPC_GENERATED_VERSION='1.70.0' and raise + # RuntimeError for grpcio<1.70.0. grpcio 1.71.0 dropped Python 3.8. + # Python 3.9+ uses a recent known-good version (1.70.0) as lower bound. + "grpcio>=1.70.0,<2.0.0; python_version>='3.9'", + "grpcio>=1.70.0,<1.71.0; python_version=='3.8'", + # Stubs are generated with protobuf 5.29.x; runtime must be >=5.29.0. + # protobuf 6.x requires Python>=3.9, so cap at <6.0 for Python 3.8. + "protobuf>=5.29.0,<6.0.0; python_version=='3.8'", + "protobuf>=5.29.0; python_version>='3.9'", + "lxml>=4.6.4", + "numpy>=1.19.5", + "sounddevice>=0.4.0", ], - python_requires=">=3.6", - entry_points={ - "console_scripts": ["tts_client = tts_client_python.tts_client:main"] + extras_require={ + "test": [ + "pytest>=7.0", + "pytest-cov>=4.0", + "jiwer>=3.0", + ], + }, + python_requires=">=3.8", + project_urls={ + "Source": "https://github.com/techmo-pl/tts-client-python", + "Documentation": "https://github.com/techmo-pl/tts-service-api/blob/master/doc/Documentation.md", }, + entry_points={"console_scripts": ["tts_client = tts_client_python.tts_client:main"]}, cmdclass={ "build_py": BuildPyGRPC, "build_grpc": BuildPackageProtos, diff --git a/setup.sh b/setup.sh index ec68c37..3454459 100755 --- a/setup.sh +++ b/setup.sh @@ -1,9 +1,15 @@ #!/bin/bash +# +# usage: ./setup.sh +# +# Run once after cloning: initialises submodules and installs pre-commit hooks. set -euo pipefail -venv_dir=.venv +git submodule sync --recursive +git submodule update --init --recursive -python3 -m venv "${venv_dir}" -source "${venv_dir}"/bin/activate -pip install -e . +if [ ! -d pre-commit ]; then + git clone --depth 1 --branch v3.0.0 https://github.com/techmo-pl/pre-commit.git +fi +./pre-commit/install.sh diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..1c1585f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import os +import subprocess +import sys +from unittest.mock import MagicMock + +import pytest + + +def _sounddevice_probe() -> str: + """Probe whether sounddevice can be imported in a subprocess. + + Returns: + "ok" — importable without issues + "crash" — killed by a signal (e.g. SIGSEGV from PortAudio in headless env) + "error" — import failed with a Python exception (missing package, broken dep) + """ + result = subprocess.run( + [sys.executable, "-c", "import sounddevice"], + capture_output=True, + timeout=10, + ) + if result.returncode == 0: + return "ok" + # Negative return code on Linux means the process was killed by a signal + # (e.g. -11 = SIGSEGV from PortAudio Pa_Initialize() in headless environments). + if result.returncode < 0: + return "crash" + # Any other non-zero exit: a Python exception (ModuleNotFoundError, ImportError, + # OSError for missing libportaudio, etc.). Let these fail naturally so dependency + # problems are visible. + return "error" + + +# sounddevice calls Pa_Initialize() at module level, which can segfault in headless +# environments (no audio hardware). No tests exercise real audio playback, so +# replacing it with a MagicMock is safe. +# We only mock on signal-kill (segfault); Python-level errors are left to fail +# naturally so that dependency problems remain visible. +_probe = _sounddevice_probe() +if _probe == "crash": + sys.modules["sounddevice"] = MagicMock() +elif _probe == "error": + import ctypes.util + + if ctypes.util.find_library("portaudio") is None: + print( + "\nWarning: libportaudio2 is not installed — sounddevice will fail on import.\nFix: sudo apt-get install libportaudio2\n", + file=sys.stderr, + ) + + +@pytest.fixture(scope="session") +def tts_service_address() -> str: + """Return TTS_SERVICE_ADDRESS from the environment. + + Tests using this fixture are automatically skipped when the variable is not set. + Set TTS_VOICE_NAME and TTS_LANGUAGE_CODE to target a specific voice (optional). + """ + addr = os.environ.get("TTS_SERVICE_ADDRESS", "") + if not addr: + pytest.skip("TTS_SERVICE_ADDRESS not set — skipping TTS integration tests") + return addr diff --git a/tests/test_channel.py b/tests/test_channel.py new file mode 100644 index 0000000..3aefd66 --- /dev/null +++ b/tests/test_channel.py @@ -0,0 +1,120 @@ +"""Unit tests for create_channel() branching logic in general.py.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from tts_client_python.general import create_channel + + +class TestCreateChannelInsecure: + def test_no_tls_params_creates_insecure(self): + with patch("tts_client_python.general.grpc.insecure_channel") as mock_insecure: + mock_insecure.return_value = MagicMock() + create_channel( + service_address="dragon:45936", + tls=False, + tls_dir="", + tls_ca_cert_file="", + tls_cert_file="", + tls_private_key_file="", + ) + mock_insecure.assert_called_once_with("dragon:45936") + + def test_no_tls_params_does_not_create_secure(self): + with patch("tts_client_python.general.grpc.insecure_channel", return_value=MagicMock()): + with patch("tts_client_python.general.grpc.secure_channel") as mock_secure: + create_channel( + service_address="localhost:50051", + tls=False, + tls_dir="", + tls_ca_cert_file="", + tls_cert_file="", + tls_private_key_file="", + ) + mock_secure.assert_not_called() + + +class TestCreateChannelTlsFlag: + def test_tls_flag_true_creates_secure_channel(self): + with patch("tts_client_python.general.grpc.secure_channel") as mock_secure: + with patch("tts_client_python.general.grpc.ssl_channel_credentials") as mock_creds: + mock_creds.return_value = MagicMock() + mock_secure.return_value = MagicMock() + create_channel( + service_address="dragon:45936", + tls=True, + tls_dir="", + tls_ca_cert_file="", + tls_cert_file="", + tls_private_key_file="", + ) + mock_secure.assert_called_once() + mock_creds.assert_called_once_with(None, None, None) + + +class TestCreateChannelTlsDir: + def test_tls_dir_reads_cert_files(self, tmp_path): + ca = tmp_path / "ca.crt" + key = tmp_path / "client.key" + crt = tmp_path / "client.crt" + ca.write_bytes(b"ca-data") + key.write_bytes(b"key-data") + crt.write_bytes(b"crt-data") + + with patch("tts_client_python.general.grpc.secure_channel") as mock_secure: + with patch("tts_client_python.general.grpc.ssl_channel_credentials") as mock_creds: + mock_creds.return_value = MagicMock() + mock_secure.return_value = MagicMock() + create_channel( + service_address="dragon:45936", + tls=False, + tls_dir=str(tmp_path), + tls_ca_cert_file="", + tls_cert_file="", + tls_private_key_file="", + ) + mock_creds.assert_called_once_with(b"ca-data", b"key-data", b"crt-data") + mock_secure.assert_called_once() + + +class TestCreateChannelIndividualFiles: + def test_individual_cert_files_override(self, tmp_path): + ca_file = tmp_path / "ca.crt" + ca_file.write_bytes(b"ca-content") + + with patch("tts_client_python.general.grpc.secure_channel") as mock_secure: + with patch("tts_client_python.general.grpc.ssl_channel_credentials") as mock_creds: + mock_creds.return_value = MagicMock() + mock_secure.return_value = MagicMock() + create_channel( + service_address="dragon:45936", + tls=False, + tls_dir="", + tls_ca_cert_file=str(ca_file), + tls_cert_file="", + tls_private_key_file="", + ) + mock_creds.assert_called_once_with(b"ca-content", None, None) + + def test_mutual_tls_with_individual_files(self, tmp_path): + ca_file = tmp_path / "ca.crt" + cert_file = tmp_path / "client.crt" + key_file = tmp_path / "client.key" + ca_file.write_bytes(b"ca") + cert_file.write_bytes(b"cert") + key_file.write_bytes(b"key") + + with patch("tts_client_python.general.grpc.secure_channel") as mock_secure: + with patch("tts_client_python.general.grpc.ssl_channel_credentials") as mock_creds: + mock_creds.return_value = MagicMock() + mock_secure.return_value = MagicMock() + create_channel( + service_address="dragon:45936", + tls=False, + tls_dir="", + tls_ca_cert_file=str(ca_file), + tls_cert_file=str(cert_file), + tls_private_key_file=str(key_file), + ) + mock_creds.assert_called_once_with(b"ca", b"key", b"cert") diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..57a1de0 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,149 @@ +"""Unit tests for CLI argument validation functions in tts_client.py.""" + +from __future__ import annotations + +import argparse + +import pytest + +from tts_client_python.tts_client import ( + Once, + check_voice_parameters, + positive_int, + unsigned_int, + valid_service_address, + valid_session_id, + valid_tls_dir, +) + + +class TestValidServiceAddress: + def test_valid_hostname_and_port(self): + assert valid_service_address("dragon:45936") == "dragon:45936" + + def test_valid_ip_and_port(self): + assert valid_service_address("192.168.1.1:8080") == "192.168.1.1:8080" + + def test_valid_localhost(self): + assert valid_service_address("localhost:50051") == "localhost:50051" + + def test_missing_port_raises(self): + with pytest.raises(argparse.ArgumentTypeError, match="Invalid service address"): + valid_service_address("dragon") + + def test_missing_host_raises(self): + with pytest.raises(argparse.ArgumentTypeError): + valid_service_address(":45936") + + def test_invalid_characters_raises(self): + with pytest.raises(argparse.ArgumentTypeError): + valid_service_address("dragon:port_abc") + + def test_port_too_long_raises(self): + with pytest.raises(argparse.ArgumentTypeError): + valid_service_address("dragon:123456") + + +class TestValidSessionId: + def test_valid_alphanumeric(self): + assert valid_session_id("session123") == "session123" + + def test_valid_with_dash_and_underscore(self): + assert valid_session_id("my-session_01") == "my-session_01" + + def test_none_returns_empty_string(self): + assert valid_session_id(None) == "" + + def test_special_characters_raise(self): + with pytest.raises(argparse.ArgumentTypeError, match="Invalid session ID"): + valid_session_id("invalid session!") + + def test_too_long_raises(self): + long_id = "a" * 64 + with pytest.raises(argparse.ArgumentTypeError, match="shorter than 64"): + valid_session_id(long_id) + + def test_exactly_63_chars_is_valid(self): + ok_id = "a" * 63 + assert valid_session_id(ok_id) == ok_id + + +class TestValidTlsDir: + def test_valid_dir_with_all_files(self, tmp_path): + for fname in ["client.crt", "client.key", "ca.crt"]: + (tmp_path / fname).write_bytes(b"dummy") + assert valid_tls_dir(str(tmp_path)) == str(tmp_path) + + def test_missing_file_raises(self, tmp_path): + (tmp_path / "client.crt").write_bytes(b"dummy") + (tmp_path / "client.key").write_bytes(b"dummy") + # ca.crt missing + with pytest.raises(argparse.ArgumentTypeError, match="missing files"): + valid_tls_dir(str(tmp_path)) + + def test_nonexistent_dir_raises(self): + with pytest.raises(argparse.ArgumentTypeError, match="Invalid directory"): + valid_tls_dir("/nonexistent/path/xyz") + + +class TestPositiveInt: + def test_positive_value(self): + assert positive_int("5") == 5 + + def test_zero_raises(self): + with pytest.raises(argparse.ArgumentTypeError, match="greater than 0"): + positive_int("0") + + def test_negative_raises(self): + with pytest.raises(argparse.ArgumentTypeError): + positive_int("-1") + + def test_non_integer_raises(self): + with pytest.raises(argparse.ArgumentTypeError, match="invalid int"): + positive_int("abc") + + +class TestUnsignedInt: + def test_positive_value(self): + assert unsigned_int("10") == 10 + + def test_zero_is_valid(self): + assert unsigned_int("0") == 0 + + def test_negative_raises(self): + with pytest.raises(argparse.ArgumentTypeError, match="greater than or equal to 0"): + unsigned_int("-1") + + def test_non_integer_raises(self): + with pytest.raises(argparse.ArgumentTypeError, match="invalid int"): + unsigned_int("3.14") + + +class TestCheckVoiceParameters: + def test_valid_parameters_no_exit(self): + check_voice_parameters("Agnieszka-1", "pl-PL") + + def test_empty_voice_name_exits(self): + with pytest.raises(SystemExit): + check_voice_parameters("", "pl-PL") + + def test_empty_language_code_exits(self): + with pytest.raises(SystemExit): + check_voice_parameters("Agnieszka-1", "") + + +class TestOnceAction: + def _make_parser_with_once(self): + parser = argparse.ArgumentParser() + parser.add_argument("--foo", action=Once, default=None) + return parser + + def test_single_use_succeeds(self): + parser = self._make_parser_with_once() + args = parser.parse_args(["--foo", "bar"]) + assert args.foo == "bar" + + def test_duplicate_use_raises_system_exit(self): + parser = self._make_parser_with_once() + with pytest.raises(SystemExit): + parser.parse_args(["--foo", "a", "--foo", "b"]) diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..b0a8534 --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,178 @@ +"""Unit tests for helper functions in general.py (encoding, path, voice, config).""" + +from __future__ import annotations + +import pytest + +from tts_client_python.general import create_out_path, create_voice, get_audio_encoding, prepare_synthesis_config +from tts_client_python.proto import techmo_tts_pb2 + + +class TestGetAudioEncoding: + def test_pcm16(self): + assert get_audio_encoding("pcm16") == techmo_tts_pb2.AudioEncoding.PCM16 # type: ignore[attr-defined] + + def test_ogg_vorbis(self): + assert get_audio_encoding("ogg-vorbis") == techmo_tts_pb2.AudioEncoding.OGG_VORBIS # type: ignore[attr-defined] + + def test_ogg_opus(self): + assert get_audio_encoding("ogg-opus") == techmo_tts_pb2.AudioEncoding.OGG_OPUS # type: ignore[attr-defined] + + def test_a_law(self): + assert get_audio_encoding("a-law") == techmo_tts_pb2.AudioEncoding.A_LAW # type: ignore[attr-defined] + + def test_mu_law(self): + assert get_audio_encoding("mu-law") == techmo_tts_pb2.AudioEncoding.MU_LAW # type: ignore[attr-defined] + + def test_unknown_encoding_raises(self): + with pytest.raises(RuntimeError, match="Unsupported audio-encoding"): + get_audio_encoding("mp3") + + +class TestCreateOutPath: + def test_explicit_path_returned_unchanged(self): + enc = techmo_tts_pb2.AudioEncoding.PCM16 # type: ignore[attr-defined] + assert create_out_path("/tmp/my.wav", enc) == "/tmp/my.wav" # noqa: S108 + + def test_empty_path_pcm16_gives_wav(self): + enc = techmo_tts_pb2.AudioEncoding.PCM16 # type: ignore[attr-defined] + assert create_out_path("", enc) == "output.wav" + + def test_empty_path_a_law_gives_wav(self): + enc = techmo_tts_pb2.AudioEncoding.A_LAW # type: ignore[attr-defined] + assert create_out_path("", enc) == "output.wav" + + def test_empty_path_mu_law_gives_wav(self): + enc = techmo_tts_pb2.AudioEncoding.MU_LAW # type: ignore[attr-defined] + assert create_out_path("", enc) == "output.wav" + + def test_empty_path_ogg_vorbis_gives_ogg(self): + enc = techmo_tts_pb2.AudioEncoding.OGG_VORBIS # type: ignore[attr-defined] + assert create_out_path("", enc) == "output.ogg" + + def test_empty_path_ogg_opus_gives_ogg(self): + enc = techmo_tts_pb2.AudioEncoding.OGG_OPUS # type: ignore[attr-defined] + assert create_out_path("", enc) == "output.ogg" + + def test_unsupported_encoding_raises(self): + with pytest.raises(RuntimeError, match="Unsupported audio encoding"): + create_out_path("", 999) + + +class TestCreateVoice: + def test_name_only(self): + voice = create_voice(voice_name="Agnieszka-1", voice_gender="", voice_age="", voice_variant=1) + assert voice.name == "Agnieszka-1" + + def test_female_gender(self): + voice = create_voice(voice_name="", voice_gender="female", voice_age="", voice_variant=1) + assert voice.gender == techmo_tts_pb2.Gender.FEMALE # type: ignore[attr-defined] + + def test_male_gender(self): + voice = create_voice(voice_name="", voice_gender="male", voice_age="", voice_variant=1) + assert voice.gender == techmo_tts_pb2.Gender.MALE # type: ignore[attr-defined] + + def test_adult_age(self): + voice = create_voice(voice_name="", voice_gender="", voice_age="adult", voice_variant=1) + assert voice.age == techmo_tts_pb2.Age.ADULT # type: ignore[attr-defined] + + def test_child_age(self): + voice = create_voice(voice_name="", voice_gender="", voice_age="child", voice_variant=1) + assert voice.age == techmo_tts_pb2.Age.CHILD # type: ignore[attr-defined] + + def test_senile_age(self): + voice = create_voice(voice_name="", voice_gender="", voice_age="senile", voice_variant=1) + assert voice.age == techmo_tts_pb2.Age.SENILE # type: ignore[attr-defined] + + def test_variant(self): + voice = create_voice(voice_name="", voice_gender="", voice_age="", voice_variant=3) + assert voice.variant == 3 + + def test_unsupported_gender_raises(self): + with pytest.raises(RuntimeError, match="Unsupported voice-gender"): + create_voice(voice_name="", voice_gender="nonbinary", voice_age="", voice_variant=1) + + def test_unsupported_age_raises(self): + with pytest.raises(RuntimeError, match="Unsupported voice-age"): + create_voice(voice_name="", voice_gender="", voice_age="toddler", voice_variant=1) + + +class TestPrepareSynthesisConfig: + def test_all_defaults_returns_none(self): + result = prepare_synthesis_config( + language_code="", + voice_name="", + voice_age="", + voice_gender="", + voice_variant=1, + speech_pitch=1.0, + speech_range=1.0, + speech_rate=1.0, + speech_stress=1.0, + speech_volume=1.0, + ) + assert result is None + + def test_language_code_returns_config(self): + result = prepare_synthesis_config( + language_code="pl-PL", + voice_name="", + voice_age="", + voice_gender="", + voice_variant=1, + speech_pitch=1.0, + speech_range=1.0, + speech_rate=1.0, + speech_stress=1.0, + speech_volume=1.0, + ) + assert result is not None + assert result.language_code == "pl-PL" + + def test_voice_name_triggers_voice_in_config(self): + result = prepare_synthesis_config( + language_code="", + voice_name="Agnieszka-1", + voice_age="", + voice_gender="", + voice_variant=1, + speech_pitch=1.0, + speech_range=1.0, + speech_rate=1.0, + speech_stress=1.0, + speech_volume=1.0, + ) + assert result is not None + assert result.voice.name == "Agnieszka-1" + + def test_non_default_speech_rate_sets_prosodic(self): + result = prepare_synthesis_config( + language_code="", + voice_name="", + voice_age="", + voice_gender="", + voice_variant=1, + speech_pitch=1.0, + speech_range=1.0, + speech_rate=1.5, + speech_stress=1.0, + speech_volume=1.0, + ) + assert result is not None + assert abs(result.prosodic_properties.rate - 1.5) < 1e-6 + + def test_non_default_variant_triggers_voice(self): + result = prepare_synthesis_config( + language_code="", + voice_name="", + voice_age="", + voice_gender="", + voice_variant=2, + speech_pitch=1.0, + speech_range=1.0, + speech_rate=1.0, + speech_stress=1.0, + speech_volume=1.0, + ) + assert result is not None + assert result.voice.variant == 2 diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..a7876bf --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,57 @@ +"""Integration tests — require a live TTS service. + +These tests are excluded from the default pytest run (see pytest.ini addopts). +Run them explicitly: + + TTS_SERVICE_ADDRESS=host:port pytest -m integration + +Optional environment variables: + TTS_VOICE_NAME — voice name passed to --voice-name (default: service default) + TTS_LANGUAGE_CODE — ISO 639-1 language code (default: service default) +""" + +from __future__ import annotations + +import os + +import pytest + +pytestmark = pytest.mark.integration + + +def test_synthesize_generates_audio_file(tts_service_address, tmp_path): + """Verify the TTS service produces a non-empty WAV audio file.""" + from tts_client_python.general import synthesize + + output = tmp_path / "output.wav" + + synthesize( + service_address=tts_service_address, + tls=False, + tls_dir="", + tls_ca_cert_file="", + tls_cert_file="", + tls_private_key_file="", + session_id="", + grpc_timeout=10_000, + audio_encoding="pcm16", + sampling_rate=0, + max_frame_size=0, + language_code=os.environ.get("TTS_LANGUAGE_CODE", ""), + voice_name=os.environ.get("TTS_VOICE_NAME", ""), + voice_age="", + voice_gender="", + voice_variant=1, + speech_pitch=1.0, + speech_range=1.0, + speech_rate=1.0, + speech_stress=1.0, + speech_volume=1.0, + play=False, + response="streaming", + out_path=str(output), + text="Test audio synthesis.", + ) + + assert output.exists(), f"Audio file was not created — check TTS_SERVICE_ADDRESS={tts_service_address}" + assert output.stat().st_size > 0, "Audio file was created but is empty" diff --git a/tests/test_wave_utils.py b/tests/test_wave_utils.py new file mode 100644 index 0000000..a4088e9 --- /dev/null +++ b/tests/test_wave_utils.py @@ -0,0 +1,158 @@ +"""Unit tests for write_wave_file() and AudioSaver in wave_utils.py / general.py.""" + +from __future__ import annotations + +import struct + +import pytest + +from tts_client_python.general import AudioSaver +from tts_client_python.proto import techmo_tts_pb2 +from tts_client_python.wave_utils import AudioFormat, write_wave_file + + +class TestWriteWaveFile: + def _parse_header(self, data: bytes) -> dict: + fmt = "<4sL4s4sLHHLLHH4sL" + header_size = struct.calcsize(fmt) + ( + riff, + riff_size, + wave, + fmt_id, + fmt_chunk_size, + audio_fmt, + num_channels, + sample_rate, + byte_rate, + block_align, + bits_per_sample, + data_id, + data_size, + ) = struct.unpack(fmt, data[:header_size]) + return { + "riff": riff, + "riff_size": riff_size, + "wave": wave, + "fmt_id": fmt_id, + "fmt_chunk_size": fmt_chunk_size, + "audio_fmt": audio_fmt, + "num_channels": num_channels, + "sample_rate": sample_rate, + "byte_rate": byte_rate, + "block_align": block_align, + "bits_per_sample": bits_per_sample, + "data_id": data_id, + "data_size": data_size, + } + + def test_riff_header_markers(self, tmp_path): + out = tmp_path / "out.wav" + audio_data = bytearray(b"\x00\x01" * 100) + write_wave_file(str(out), audio_data, 16000, 1, 2, int(AudioFormat.PCM16)) + raw = out.read_bytes() + hdr = self._parse_header(raw) + assert hdr["riff"] == b"RIFF" + assert hdr["wave"] == b"WAVE" + assert hdr["fmt_id"] == b"fmt " + assert hdr["data_id"] == b"data" + + def test_pcm16_audio_format_field(self, tmp_path): + out = tmp_path / "out.wav" + write_wave_file(str(out), bytearray(b"\x00" * 4), 8000, 1, 2, int(AudioFormat.PCM16)) + hdr = self._parse_header(out.read_bytes()) + assert hdr["audio_fmt"] == 1 # PCM16 + + def test_a_law_audio_format_field(self, tmp_path): + out = tmp_path / "out.wav" + write_wave_file(str(out), bytearray(b"\x00" * 4), 8000, 1, 1, int(AudioFormat.A_LAW)) + hdr = self._parse_header(out.read_bytes()) + assert hdr["audio_fmt"] == 6 + + def test_mu_law_audio_format_field(self, tmp_path): + out = tmp_path / "out.wav" + write_wave_file(str(out), bytearray(b"\x00" * 4), 8000, 1, 1, int(AudioFormat.MU_LAW)) + hdr = self._parse_header(out.read_bytes()) + assert hdr["audio_fmt"] == 7 + + def test_data_appended_after_header(self, tmp_path): + out = tmp_path / "out.wav" + audio_data = bytearray(b"\xab\xcd" * 50) + write_wave_file(str(out), audio_data, 16000, 1, 2, int(AudioFormat.PCM16)) + raw = out.read_bytes() + header_size = struct.calcsize("<4sL4s4sLHHLLHH4sL") + assert raw[header_size:] == bytes(audio_data) + + def test_riff_size_is_36_plus_data(self, tmp_path): + out = tmp_path / "out.wav" + audio_data = bytearray(b"\x00" * 200) + write_wave_file(str(out), audio_data, 16000, 1, 2, int(AudioFormat.PCM16)) + hdr = self._parse_header(out.read_bytes()) + assert hdr["riff_size"] == 36 + len(audio_data) + + def test_sample_rate_in_header(self, tmp_path): + out = tmp_path / "out.wav" + write_wave_file(str(out), bytearray(b"\x00" * 4), 44100, 1, 2, int(AudioFormat.PCM16)) + hdr = self._parse_header(out.read_bytes()) + assert hdr["sample_rate"] == 44100 + + +class TestAudioSaver: + def test_append_accumulates_data(self): + saver = AudioSaver() + saver.append(b"\x01\x02") + saver.append(b"\x03\x04") + assert saver._buffer == bytearray(b"\x01\x02\x03\x04") + + def test_clear_empties_buffer(self): + saver = AudioSaver() + saver.append(b"\x01\x02") + saver.clear() + assert len(saver._buffer) == 0 + + def test_save_pcm16_writes_wav(self, tmp_path): + saver = AudioSaver(sampling_frequency=16000) + saver.setEncoding(techmo_tts_pb2.AudioEncoding.PCM16) # type: ignore[attr-defined] + saver.append(b"\x00\x01" * 100) + out = tmp_path / "speech.wav" + saver.save(str(out)) + assert out.exists() + content = out.read_bytes() + assert content[:4] == b"RIFF" + + def test_save_ogg_writes_raw(self, tmp_path): + saver = AudioSaver() + saver.setEncoding(techmo_tts_pb2.AudioEncoding.OGG_VORBIS) # type: ignore[attr-defined] + raw = b"OggS\x00fake-ogg-data" + saver.append(raw) + out = tmp_path / "speech.ogg" + saver.save(str(out)) + assert out.read_bytes() == raw + + def test_save_without_framerate_raises(self, tmp_path): + saver = AudioSaver() + saver.setEncoding(techmo_tts_pb2.AudioEncoding.PCM16) # type: ignore[attr-defined] + saver.append(b"\x00\x01") + with pytest.raises(RuntimeError, match="Sample rate has not been set"): + saver.save(str(tmp_path / "out.wav")) + + def test_set_frame_rate_and_samp_width(self): + saver = AudioSaver() + saver.setFrameRate(8000) + saver.setSampWidth(1) + assert saver._framerate == 8000 + assert saver._sampwidth == 1 + + def test_is_equal_to(self): + saver1 = AudioSaver() + saver2 = AudioSaver() + saver1.append(b"\xaa\xbb") + saver2.append(b"\xaa\xbb") + assert saver1.isEqualTo(saver2) + + def test_is_not_equal_to(self): + saver1 = AudioSaver() + saver2 = AudioSaver() + saver1.append(b"\xaa") + saver2.append(b"\xbb") + assert not saver1.isEqualTo(saver2) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..88f3198 --- /dev/null +++ b/tox.ini @@ -0,0 +1,40 @@ +[tox] +# Python 3.7 is not downloadable via uv; minimum testable version is 3.8. +# Python 3.14 is included as a release candidate to catch forward-compat issues. +envlist = py38, py39, py310, py311, py312, py313, py314 +requires = + tox-uv>=1 + +[testenv] +# skip_install=true keeps each env lightweight: the package is found via PYTHONPATH +# rather than doing a full editable install in each env. +# Proto stubs (tts_client_python/proto/*_pb2.py) are NOT committed — they are +# gitignored and must be generated before tests run. In CI the "Generate proto +# stubs" step does this; locally, run `./install.sh` first (triggers proto +# generation via the setup.py cmdclass hooks; requires the tts-service-api +# submodule — see README § Running tests). +skip_install = true +set_env = PYTHONPATH = {toxinidir} +# Pass service-address variables so integration tests can connect to a live service +# when run via tox (e.g. tox -e py312 -- -m integration). +passenv = + TTS_* +deps = + # grpcio 1.71.0 dropped Python 3.8; stubs require grpcio>=1.70.0. + grpcio>=1.70.0,<2.0.0; python_version>="3.9" + grpcio>=1.70.0,<1.71.0; python_version=="3.8" + protobuf>=5.29.0,<6.0.0; python_version=="3.8" + protobuf>=5.29.0; python_version>="3.9" + lxml>=4.6.4 + numpy>=1.19.5 + sounddevice>=0.4.0 + pytest>=7.0 + pytest-cov>=4.0 + jiwer>=3.0 +commands_pre = + # Abort early with a clear message if proto stubs are missing rather than + # letting pytest fail with a cryptic ModuleNotFoundError deep in imports. + # PYTHONPATH already contains {toxinidir} so we use it to locate the stub. + python -c "import os, sys; stub = os.path.join(os.environ['PYTHONPATH'], 'tts_client_python', 'proto', 'techmo_tts_pb2.py'); sys.exit(0) if os.path.exists(stub) else sys.exit('Proto stubs missing. Run: ./install.sh (or: python setup.py build_grpc)')" +commands = + pytest --basetemp={envtmpdir} --cov=tts_client_python --cov-report=term-missing --cov-report=xml:{envtmpdir}/coverage.xml {posargs} diff --git a/tts_client_python/VERSION.py b/tts_client_python/VERSION.py index f2bb12e..cfee65c 100644 --- a/tts_client_python/VERSION.py +++ b/tts_client_python/VERSION.py @@ -1 +1 @@ -TTS_CLIENT_PYTHON_VERSION = "3.0.0" +TTS_CLIENT_PYTHON_VERSION = "3.2.8" diff --git a/tts_client_python/general.py b/tts_client_python/general.py index b981eee..0d4bcf9 100644 --- a/tts_client_python/general.py +++ b/tts_client_python/general.py @@ -1,28 +1,37 @@ -from tts_client_python.proto import techmo_tts_pb2 as techmo_tts_pb2 -from tts_client_python.proto import techmo_tts_pb2_grpc as techmo_tts_pb2_grpc -from io import BytesIO -import grpc -import os +from __future__ import annotations + import wave -import struct -import sys +from pathlib import Path +from typing import Any + +import grpc import numpy as np import sounddevice as sd +from tts_client_python.proto import techmo_tts_pb2 as techmo_tts_pb2 +from tts_client_python.proto import techmo_tts_pb2_grpc as techmo_tts_pb2_grpc +from tts_client_python.wave_utils import AudioFormat, write_wave_file + class AudioPlayer: - def __init__(self, sampling_rate_hz=None, encoding="pcm16"): + def __init__( + self, + sampling_rate_hz: int | None = None, + encoding: Any = None, + ) -> None: + if encoding is None: + encoding = techmo_tts_pb2.AudioEncoding.PCM16 # type: ignore[attr-defined] self.sampling_rate_hz = sampling_rate_hz - self.stream = None + self.stream: sd.OutputStream | None = None - if encoding == "pcm16": + if encoding == techmo_tts_pb2.AudioEncoding.PCM16: # type: ignore[attr-defined] self.encoding = np.int16 - elif encoding == "ogg-vorbis": - raise RuntimeError("OGG-Vorbis audio-encoding is not implemented.") + elif encoding == "ogg-vorbis" or encoding == "ogg-opus": + raise RuntimeError("Audio Player supports only PCM16 audio-encoding.") else: raise RuntimeError("Unsupported audio-encoding: " + str(encoding)) - def start(self, sampling_rate_hz=None): + def start(self, sampling_rate_hz: int | None = None) -> None: if sampling_rate_hz is not None: self.sampling_rate_hz = sampling_rate_hz self.stop() @@ -34,10 +43,11 @@ def start(self, sampling_rate_hz=None): ) self.stream.start() - def append(self, audio): - self.stream.write(np.fromstring(audio, dtype=np.int16)) + def append(self, audio: bytes) -> None: + if self.stream is not None: + self.stream.write(np.frombuffer(audio, dtype=np.int16)) - def stop(self): + def stop(self) -> None: if self.stream is not None: self.stream.close() @@ -45,302 +55,495 @@ def stop(self): class AudioSaver: """Wave Saver for TTS""" - _buffer = None - _framerate = None - _nchannels = None - _sampwidth = None - _encoding = None + _buffer: bytearray + _framerate: int | None + _nchannels: int + _sampwidth: int + _encoding: Any - def __init__(self, sampling_frequency=None): + def __init__(self, sampling_frequency: int | None = None) -> None: self._buffer = bytearray() self._framerate = sampling_frequency self._nchannels = 1 self._sampwidth = 2 + self._encoding = None - def setEncoding(self, encoding): + def setEncoding(self, encoding: Any) -> None: self._encoding = encoding - def setFrameRate(self, sampling_frequency): + def setFrameRate(self, sampling_frequency: int) -> None: self._framerate = sampling_frequency - def append(self, audiodata): + def setSampWidth(self, sample_width: int) -> None: + self._sampwidth = sample_width + + def append(self, audiodata: bytes) -> None: self._buffer += audiodata - def clear(self): + def clear(self) -> None: self._buffer.clear() - def save(self, filename): - if self._encoding == techmo_tts_pb2.AudioEncoding.PCM16: + def save(self, filename: str) -> None: + if ( + (self._encoding == techmo_tts_pb2.AudioEncoding.PCM16) # type: ignore[attr-defined] + or (self._encoding == techmo_tts_pb2.AudioEncoding.A_LAW) # type: ignore[attr-defined] + or (self._encoding == techmo_tts_pb2.AudioEncoding.MU_LAW) # type: ignore[attr-defined] + ): if not self._framerate: raise RuntimeError("Sample rate has not been set") - with wave.open(filename, "w") as w: - params = ( - self._nchannels, - self._sampwidth, - self._framerate, - len(self._buffer), - "NONE", - "not compressed", - ) - w.setparams(params) - w.writeframes(self._buffer) + + if self._encoding == techmo_tts_pb2.AudioEncoding.MU_LAW: # type: ignore[attr-defined] + audio_format = int(AudioFormat.MU_LAW) + elif self._encoding == techmo_tts_pb2.AudioEncoding.A_LAW: # type: ignore[attr-defined] + audio_format = int(AudioFormat.A_LAW) + else: + audio_format = int(AudioFormat.PCM16) + + write_wave_file( + filename, + self._buffer, + self._framerate, + self._nchannels, + int(self._sampwidth), + audio_format, + ) else: - f = open(filename, "wb") - f.write(self._buffer) - f.close() + with open(filename, "wb") as f: + f.write(self._buffer) - def load(self, filename): + def load(self, filename: str) -> None: with wave.open(filename, "r") as wr: - self._buffer = wr.readframes(wr.getnframes()) + self._buffer = bytearray(wr.readframes(wr.getnframes())) - def isEqualTo(self, asv): + def isEqualTo(self, asv: AudioSaver) -> bool: return self._buffer == asv._buffer - def print(self): - if len(self._buffer) > 0: - header = struct.pack( - "<4sL4s4sLHHLLHH4sL", - b"RIFF", - 36 + len(self._buffer), - b"WAVE", - b"fmt ", - 16, - 0x0001, - self._nchannels, - self._framerate, - self._nchannels * self._framerate * self._sampwidth, - self._nchannels * self._sampwidth, - self._sampwidth * 8, - b"data", - len(self._buffer), - ) - sys.stdout._buffer.write(header + bytes(self.buffer)) - class GrpcRequestConfig: - _channel = None - _stub = None - _timeout = None - _metadata = None - - def __init__(self, service, tls_directory, grpc_timeout, session_id): - self._channel = create_channel(service, tls_directory) - self._stub = techmo_tts_pb2_grpc.TTSStub(self._channel) + _channel: grpc.Channel | None + _stub: Any + _timeout: float | None + _metadata: list[tuple[str, str]] + + def __init__( + self, + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int = 0, + ) -> None: + self._channel = create_channel( + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + ) + self._stub = techmo_tts_pb2_grpc.TTSStub(self._channel) # type: ignore[no-untyped-call] if grpc_timeout > 0: self._timeout = grpc_timeout / 1000 + else: + self._timeout = None self._metadata = [] if session_id: self._metadata = [("session_id", session_id)] - def get_channel(self): + def get_channel(self) -> grpc.Channel | None: return self._channel - def get_stub(self): + def get_stub(self) -> Any: return self._stub - def get_timeout(self): + def get_timeout(self) -> float | None: return self._timeout - def get_metadata(self): + def get_metadata(self) -> list[tuple[str, str]]: return self._metadata -def list_voices(args): +def print_service_version( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, +) -> None: + rc = GrpcRequestConfig( + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, + ) + request = techmo_tts_pb2.GetServiceVersionRequest() # type: ignore[attr-defined] + + try: + stub = rc.get_stub() + response = stub.GetServiceVersion(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) + print(response) + except grpc.RpcError as e: + print_server_side_error(str(e)) + + +def print_resources_id( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, +) -> None: + rc = GrpcRequestConfig( + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, + ) + request = techmo_tts_pb2.GetResourcesIdRequest() # type: ignore[attr-defined] + try: + stub = rc.get_stub() + response = stub.GetResourcesId(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) + print(response) + except grpc.RpcError as e: + print_server_side_error(str(e)) + + +def list_voices( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + language_code: str, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, ) - request = techmo_tts_pb2.ListVoicesRequest(language=args.language) + request = techmo_tts_pb2.ListVoicesRequest(language_code=language_code) # type: ignore[attr-defined] try: stub = rc.get_stub() - response = stub.ListVoices( - request, timeout=rc.get_timeout(), metadata=rc.get_metadata() - ) + response = stub.ListVoices(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) print("\nAvailable voices:\n") print(response) except grpc.RpcError as e: - print( - "[Server-side error] Received following RPC error from the TTS service:", - str(e), + print_server_side_error(str(e)) + + +def prepare_synthesis_config( + language_code: str, + voice_name: str, + voice_age: str, + voice_gender: str, + voice_variant: int, + speech_pitch: float, + speech_range: float, + speech_rate: float, + speech_stress: float, + speech_volume: float, +) -> Any: + synthesis_config = None + voice = None + prosodic_properties = None + + if (voice_name != "") or (voice_age != "") or (voice_gender != "") or (voice_variant != 1): + voice = create_voice( + voice_name=voice_name, + voice_gender=voice_gender, + voice_age=voice_age, + voice_variant=voice_variant, ) + if (speech_pitch != 1.0) or (speech_range != 1.0) or (speech_rate != 1.0) or (speech_stress != 1.0) or (speech_volume != 1.0): + prosodic_properties = techmo_tts_pb2.ProsodicProperties( # type: ignore[attr-defined] + pitch=speech_pitch, + range=speech_range, + rate=speech_rate, + stress=speech_stress, + volume=speech_volume, + ) -def synthesize(args, text): - audio_encoding = get_audio_encoding(args) - out_path = create_out_path(args, audio_encoding) - - channel = create_channel(args.service, args.tls_directory) - stub = techmo_tts_pb2_grpc.TTSStub(channel) - - config = techmo_tts_pb2.SynthesizeConfig( - language=args.language, - voice=create_voice(args), - prosodic_properties=techmo_tts_pb2.ProsodicProperties( - pitch=args.speech_pitch, - range=args.speech_range, - rate=args.speech_rate, - volume=args.speech_volume, - ), - audio_config=techmo_tts_pb2.AudioConfig( - audio_encoding=audio_encoding, - sampling_rate_hz=int(args.sample_rate), - ), + if (language_code != "") or (voice is not None) or (prosodic_properties is not None): + synthesis_config = techmo_tts_pb2.SynthesisConfig( # type: ignore[attr-defined] + language_code=language_code, + voice=voice, + prosodic_properties=prosodic_properties, + ) + if language_code == "": + synthesis_config.ClearField("language_code") + if voice is None: + synthesis_config.ClearField("voice") + if prosodic_properties is None: + synthesis_config.ClearField("prosodic_properties") + + return synthesis_config + + +def synthesize( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + audio_encoding: str, + sampling_rate: int, + max_frame_size: int, + language_code: str, + voice_name: str, + voice_age: str, + voice_gender: str, + voice_variant: int, + speech_pitch: float, + speech_range: float, + speech_rate: float, + speech_stress: float, + speech_volume: float, + play: bool, + response: str, + out_path: str, + text: str, +) -> None: + audio_encoding = get_audio_encoding(audio_encoding=audio_encoding) + out_path = create_out_path(out_path=out_path, audio_encoding=audio_encoding) + + channel = create_channel( + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + ) + stub = techmo_tts_pb2_grpc.TTSStub(channel) # type: ignore[no-untyped-call] + + synthesis_config = prepare_synthesis_config( + language_code=language_code, + voice_name=voice_name, + voice_age=voice_age, + voice_gender=voice_gender, + voice_variant=voice_variant, + speech_pitch=speech_pitch, + speech_range=speech_range, + speech_rate=speech_rate, + speech_stress=speech_stress, + speech_volume=speech_volume, ) - if args.phoneme_modifiers != "": - try: - phoneme_modifiers_array = ( - args.phoneme_modifiers[1:-1].replace("),", ");").split(";") - ) + output_config = None - for single_phoneme_modifiers_string in phoneme_modifiers_array: - single_phoneme_modifiers = single_phoneme_modifiers_string.replace( - " ", "" - )[1:-1].split(",") - config.phoneme_modifiers.append( - techmo_tts_pb2.PhonemeModifiers( - phoneme_index=int(single_phoneme_modifiers[0]), - new_pitch=float(single_phoneme_modifiers[1]), - new_duration=float(single_phoneme_modifiers[2]), - ) - ) - except Exception as e: - print( - "Error while parsing the list of phoneme modifiers:", - str(e), - "\nEnsure that format of provided phoneme modifiers list is correct: [(index1, pitch1, duration1), (index2, pitch2, duration2), ...]", - ) + if (audio_encoding != techmo_tts_pb2.AudioEncoding.PCM16) or (sampling_rate != 0) or (max_frame_size != 0): # type: ignore[attr-defined] + output_config = techmo_tts_pb2.OutputConfig( # type: ignore[attr-defined] + audio_encoding=audio_encoding, + sampling_rate_hz=sampling_rate, + max_frame_size=max_frame_size, + ) - request = techmo_tts_pb2.SynthesizeRequest(text=text, config=config) + request = techmo_tts_pb2.SynthesizeRequest(text=text, synthesis_config=synthesis_config, output_config=output_config) # type: ignore[attr-defined] - timeout = None - if args.grpc_timeout > 0: - timeout = args.grpc_timeout / 1000 # milliseconds to seconds - metadata = [] - if args.session_id: - metadata = [("session_id", args.session_id)] + if synthesis_config is None: + request.ClearField("synthesis_config") + if output_config is None: + request.ClearField("output_config") - audioPlayer = None - if args.play: - if args.sample_rate: - player_sampling_rate = int(args.sample_rate) - else: - player_sampling_rate = 8000 + timeout: float | None = None + if grpc_timeout > 0: + timeout = grpc_timeout / 1000 # milliseconds to seconds + metadata: list[tuple[str, str]] = [] + if session_id: + metadata = [("session_id", session_id)] - audioPlayer = AudioPlayer( - sampling_rate_hz=player_sampling_rate, encoding=args.audio_encoding - ) + audioPlayer: AudioPlayer | None = None + if play: + player_sampling_rate: int = sampling_rate if sampling_rate else 8000 + audioPlayer = AudioPlayer(sampling_rate_hz=player_sampling_rate, encoding=audio_encoding) audioSaver = AudioSaver() audioSaver.setEncoding(audio_encoding) + if (audio_encoding == techmo_tts_pb2.AudioEncoding.A_LAW) or (audio_encoding == techmo_tts_pb2.AudioEncoding.MU_LAW): # type: ignore[attr-defined] + audioSaver.setSampWidth(1) + try: - if args.response == "streaming": - internal_synthesize_streaming( - stub, request, timeout, metadata, audioSaver, audioPlayer - ) - elif args.response == "single": - internal_synthesize( - stub, request, timeout, metadata, audioSaver, audioPlayer - ) + if response == "streaming": + internal_synthesize_streaming(stub, request, timeout, metadata, audioSaver, audioPlayer) + elif response == "single": + internal_synthesize(stub, request, timeout, metadata, audioSaver, audioPlayer) else: - raise RuntimeError("Unsupported response type: " + args.response) + raise RuntimeError("Unsupported response type: " + response) audioSaver.save(out_path) except grpc.RpcError as e: - print( - "[Server-side error] Received following RPC error from the TTS service:", - str(e), - ) + print_server_side_error(str(e)) finally: - if args.play: + if audioPlayer is not None: audioPlayer.stop() + audioSaver.clear() -def create_channel(address, tls_directory): - if not tls_directory: - return grpc.insecure_channel(address) +def _read_file(path: str) -> bytes | None: + p = Path(path) + return p.read_bytes() if p.exists() else None - def read_file(path): - with open(path, "rb") as file: - return file.read() - return grpc.secure_channel( - address, - grpc.ssl_channel_credentials( - read_file(os.path.join(tls_directory, "ca.crt")), - read_file(os.path.join(tls_directory, "client.key")), - read_file(os.path.join(tls_directory, "client.crt")), - ), - ) +def create_channel( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, +) -> grpc.Channel: + ca_cert_file: bytes | None = None + cert_file: bytes | None = None + private_key_file: bytes | None = None + + if tls_dir: + ca_cert_file = _read_file(tls_dir + "/ca.crt") + private_key_file = _read_file(tls_dir + "/client.key") + cert_file = _read_file(tls_dir + "/client.crt") + + if tls_ca_cert_file: + ca_cert_file = _read_file(tls_ca_cert_file) + if tls_private_key_file: + private_key_file = _read_file(tls_private_key_file) -def create_out_path(args, audio_encoding): - out_path = args.out_path + if tls_cert_file: + cert_file = _read_file(tls_cert_file) + + if (ca_cert_file is None) and (cert_file is None) and (private_key_file is None) and (not tls): + return grpc.insecure_channel(service_address) + else: + return grpc.secure_channel( + service_address, + grpc.ssl_channel_credentials(ca_cert_file, private_key_file, cert_file), + ) + + +def create_out_path( + out_path: str, + audio_encoding: Any, +) -> str: if out_path == "": - if audio_encoding == techmo_tts_pb2.AudioEncoding.PCM16: - out_path = "TechmoTTS.wav" + if ( + (audio_encoding == techmo_tts_pb2.AudioEncoding.PCM16) # type: ignore[attr-defined] + or (audio_encoding == techmo_tts_pb2.AudioEncoding.A_LAW) # type: ignore[attr-defined] + or (audio_encoding == techmo_tts_pb2.AudioEncoding.MU_LAW) # type: ignore[attr-defined] + ): + out_path = "output.wav" + elif (audio_encoding == techmo_tts_pb2.AudioEncoding.OGG_VORBIS) or (audio_encoding == techmo_tts_pb2.AudioEncoding.OGG_OPUS): # type: ignore[attr-defined] + out_path = "output.ogg" else: - out_path = "TechmoTTS.ogg" - return os.path.join(out_path) - - -def create_voice(args): - if args.voice_name != "" or args.voice_gender != "" or args.voice_age != "": - gender = techmo_tts_pb2.Gender.GENDER_UNSPECIFIED - if args.voice_gender == "female": - gender = techmo_tts_pb2.Gender.FEMALE - elif args.voice_gender == "male": - gender = techmo_tts_pb2.Gender.MALE - elif args.voice_gender != "": - raise RuntimeError("Unsupported voice-gender: " + args.voice_gender) - - age = techmo_tts_pb2.Age.AGE_UNSPECIFIED - if args.voice_age == "adult": - age = techmo_tts_pb2.Age.ADULT - elif args.voice_age == "child": - age = techmo_tts_pb2.Age.CHILD - elif args.voice_age == "senile": - age = techmo_tts_pb2.Age.SENILE - elif args.voice_age != "": - raise RuntimeError("Unsupported voice-age: " + args.voice_age) - - return techmo_tts_pb2.Voice( - name=args.voice_name, gender=gender, age=age, variant=args.voice_variant - ) - else: - return None + raise RuntimeError("Unsupported audio encoding: " + str(audio_encoding)) + return out_path -def get_audio_encoding(args): - if args.audio_encoding == "pcm16": - return techmo_tts_pb2.AudioEncoding.PCM16 - elif args.audio_encoding == "ogg-vorbis": - return techmo_tts_pb2.AudioEncoding.OGG_VORBIS +def create_voice( + voice_name: str, + voice_gender: str, + voice_age: str, + voice_variant: int, +) -> Any: + try: + gender = None + age = None + if voice_gender == "female": + gender = techmo_tts_pb2.Gender.FEMALE # type: ignore[attr-defined] + elif voice_gender == "male": + gender = techmo_tts_pb2.Gender.MALE # type: ignore[attr-defined] + elif voice_gender != "": + raise RuntimeError("Unsupported voice-gender: " + voice_gender) + + if voice_age == "adult": + age = techmo_tts_pb2.Age.ADULT # type: ignore[attr-defined] + elif voice_age == "child": + age = techmo_tts_pb2.Age.CHILD # type: ignore[attr-defined] + elif voice_age == "senile": + age = techmo_tts_pb2.Age.SENILE # type: ignore[attr-defined] + elif voice_age != "": + raise RuntimeError("Unsupported voice-age: " + voice_age) + + return techmo_tts_pb2.Voice(name=voice_name, gender=gender, age=age, variant=voice_variant) # type: ignore[attr-defined] + except RuntimeError: + raise + except Exception as err: + raise RuntimeError("Unable to create voice!") from err + + +def get_audio_encoding(audio_encoding: str) -> Any: + if audio_encoding == "pcm16": + return techmo_tts_pb2.AudioEncoding.PCM16 # type: ignore[attr-defined] + elif audio_encoding == "ogg-vorbis": + return techmo_tts_pb2.AudioEncoding.OGG_VORBIS # type: ignore[attr-defined] + elif audio_encoding == "ogg-opus": + return techmo_tts_pb2.AudioEncoding.OGG_OPUS # type: ignore[attr-defined] + elif audio_encoding == "a-law": + return techmo_tts_pb2.AudioEncoding.A_LAW # type: ignore[attr-defined] + elif audio_encoding == "mu-law": + return techmo_tts_pb2.AudioEncoding.MU_LAW # type: ignore[attr-defined] else: - raise RuntimeError("Unsupported audio-encoding: " + args.audio_encoding) + raise RuntimeError("Unsupported audio-encoding: " + audio_encoding) -def internal_synthesize(stub, request, timeout, metadata, audio_saver, audio_player): +def internal_synthesize( + stub: Any, + request: Any, + timeout: float | None, + metadata: list[tuple[str, str]], + audio_saver: AudioSaver, + audio_player: AudioPlayer | None, +) -> None: response = stub.Synthesize(request, timeout=timeout, metadata=metadata) + print_warnings(response.warnings) if audio_player is not None: - audio_player.start(sample_rate=response.sampling_rate_hz) + audio_player.start(sampling_rate_hz=response.sampling_rate_hz) audio_player.append(response.audio) audio_saver.setFrameRate(response.sampling_rate_hz) audio_saver.append(response.audio) def internal_synthesize_streaming( - stub, request, timeout, metadata, audio_saver, audio_player -): + stub: Any, + request: Any, + timeout: float | None, + metadata: list[tuple[str, str]], + audio_saver: AudioSaver, + audio_player: AudioPlayer | None, +) -> None: if audio_player is not None: audio_player.start() - for response in stub.SynthesizeStreaming( - request, timeout=timeout, metadata=metadata - ): + for response in stub.SynthesizeStreaming(request, timeout=timeout, metadata=metadata): + print_warnings(response.warnings) if audio_saver._framerate: if audio_saver._framerate != response.sampling_rate_hz: raise RuntimeError("Sample rate does not match previously received.") @@ -349,3 +552,22 @@ def internal_synthesize_streaming( if audio_player is not None: audio_player.append(response.audio) audio_saver.append(response.audio) + + +def print_warnings(warnings: Any) -> None: + if warnings: + print("The following warnings were encountered:") + for w in warnings: + print(w) + + +def print_server_side_error(e: str) -> None: + if "UNAVAILABLE" in e: + print("Unable to connect to the service! Check if the service-address and TLS settings are correct.") + elif "UNIMPLEMENTED" in e: + print("[Server-side error] Feature not implemented! \n Presumably the service being queried supports a different version of the API.") + else: + print( + "[Server-side error] Received following RPC error from the TTS service: ", + e, + ) diff --git a/tts_client_python/lexicons.py b/tts_client_python/lexicons.py index 4ceb10f..c3f5640 100644 --- a/tts_client_python/lexicons.py +++ b/tts_client_python/lexicons.py @@ -1,75 +1,116 @@ -from tts_client_python.proto import techmo_tts_pb2 as techmo_tts_pb2 -from tts_client_python.proto import techmo_tts_pb2_grpc as techmo_tts_pb2_grpc +from __future__ import annotations + +from pathlib import Path + import grpc -import os -from tts_client_python.general import GrpcRequestConfig -import lxml.etree as etree +from grpc import StatusCode +from tts_client_python.general import GrpcRequestConfig +from tts_client_python.proto import techmo_tts_pb2 as techmo_tts_pb2 -def delete_lexicon(args): +def delete_lexicon( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + lexicon_uri: str, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, ) - request = techmo_tts_pb2.DeleteLexiconRequest(name=args.lexicon_to_delete) + request = techmo_tts_pb2.DeleteLexiconRequest(uri=lexicon_uri) # type: ignore[attr-defined] try: stub = rc.get_stub() - response = stub.DeleteLexicon( - request, timeout=rc.get_timeout(), metadata=rc.get_metadata() - ) - print("\nLexicon: ", args.lexicon_to_delete, " has been deleted\n") + stub.DeleteLexicon(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) + print("\nLexicon: ", lexicon_uri, " has been deleted\n") except grpc.RpcError as e: - print( - "[Server-side error] Received following RPC error from the TTS service:", - str(e), - ) - - -def get_lexicon(args): - + if e.code() == StatusCode.NOT_FOUND: + print(f"[NOT FOUND] Lexicon '{lexicon_uri}' was not found. Use --list-lexicons to find available lexicons.") + else: + print( + "[Server-side error] Received following RPC error from the TTS service:", + str(e), + ) + + +def get_lexicon( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + lexicon_uri: str, + output_path: str, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, ) - request = techmo_tts_pb2.GetLexiconRequest(name=args.lexicon_to_get) + request = techmo_tts_pb2.GetLexiconRequest(uri=lexicon_uri) # type: ignore[attr-defined] try: stub = rc.get_stub() - response = stub.GetLexicon( - request, timeout=rc.get_timeout(), metadata=rc.get_metadata() - ) - xml_parser = etree.XMLParser(remove_blank_text=True, recover=True) - x = etree.fromstring(response.content, parser=xml_parser) - print("\n---", args.lexicon_to_get, "---\n") - print(etree.tostring(x, pretty_print=True).decode()) + response = stub.GetLexicon(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) + with open(output_path, "w") as file: + file.write(response.content) except grpc.RpcError as e: - print( - "[Server-side error] Received following RPC error from the TTS service:", - str(e), - ) - - -def list_lexicons(args): - + if e.code() == StatusCode.NOT_FOUND: + print(f"[NOT FOUND] Lexicon '{lexicon_uri}' was not found. Use --list-lexicons to find available lexicons.") + else: + print( + "[Server-side error] Received following RPC error from the TTS service:", + str(e), + ) + + +def list_lexicons( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + language_code: str, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, ) - request = techmo_tts_pb2.ListLexiconsRequest(language=args.language) + request = techmo_tts_pb2.ListLexiconsRequest(language_code=language_code) # type: ignore[attr-defined] try: stub = rc.get_stub() - response = stub.ListLexicons( - request, timeout=rc.get_timeout(), metadata=rc.get_metadata() - ) + response = stub.ListLexicons(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) print("\nAvailable lexicons:\n") print(response) except grpc.RpcError as e: @@ -79,25 +120,55 @@ def list_lexicons(args): ) -def put_lexicon(args): - +def put_lexicon( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + lexicon_uri: str, + lexicon_path: str, + outside_lookup_behaviour: str, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, ) - request = techmo_tts_pb2.PutLexiconRequest( - name=args.put_lexicon[0], content=args.put_lexicon[1] + lexicon_content = Path(lexicon_path).read_text() + + outside_lookup = None + if outside_lookup_behaviour == "allowed": + outside_lookup = techmo_tts_pb2.OutsideLookupBehaviour.ALLOWED # type: ignore[attr-defined] + elif outside_lookup_behaviour == "disallowed": + outside_lookup = techmo_tts_pb2.OutsideLookupBehaviour.DISALLOWED # type: ignore[attr-defined] + else: + raise RuntimeError("Illegal value for OUTSIDE_LOOKUP_BEHAVIOUR") + + request = techmo_tts_pb2.PutLexiconRequest( # type: ignore[attr-defined] + uri=lexicon_uri, + content=lexicon_content, + outside_lookup_behaviour=outside_lookup, ) try: stub = rc.get_stub() stub.PutLexicon(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) - print("\nLexicon: ", args.put_lexicon[0], " has been added\n") + print("\nLexicon: ", lexicon_uri, " has been added\n") except grpc.RpcError as e: - print( - "[Server-side error] Received following RPC error from the TTS service:", - str(e), - ) + if e.code() == StatusCode.NOT_FOUND: + print(f"[NOT FOUND] Lexicon '{lexicon_uri}' was not found. Use --list-lexicons to find available lexicons.") + else: + print( + "[Server-side error] Received following RPC error from the TTS service:", + str(e), + ) diff --git a/tts_client_python/recordings.py b/tts_client_python/recordings.py index e22bf80..3c5c798 100644 --- a/tts_client_python/recordings.py +++ b/tts_client_python/recordings.py @@ -1,29 +1,51 @@ -from tts_client_python.proto import techmo_tts_pb2 as techmo_tts_pb2 -from tts_client_python.proto import techmo_tts_pb2_grpc as techmo_tts_pb2_grpc -import grpc -import os +from __future__ import annotations + import wave -from tts_client_python.general import GrpcRequestConfig +import grpc + +from tts_client_python.general import GrpcRequestConfig +from tts_client_python.proto import techmo_tts_pb2 as techmo_tts_pb2 +from tts_client_python.wave_utils import AudioFormat, write_wave_file -def delete_recording(args): +def delete_recording( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + language_code: str, + voice_name: str, + voice_variant: int, + recording_key: str, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, ) - request = techmo_tts_pb2.DeleteRecordingRequest( - voice_name=args.delete_recording[0], recording_key=args.delete_recording[1] + request = techmo_tts_pb2.DeleteRecordingRequest( # type: ignore[attr-defined] + voice_profile=techmo_tts_pb2.VoiceProfile( # type: ignore[attr-defined] + voice_name=voice_name, + voice_variant=voice_variant, + language_code=language_code, + ), + recording_key=recording_key, ) try: stub = rc.get_stub() - response = stub.DeleteRecording( - request, timeout=rc.get_timeout(), metadata=rc.get_metadata() - ) - print("\nRecording: ", args.delete_recording[1], " has been deleted\n") + stub.DeleteRecording(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) + print("\nRecording: ", recording_key, " has been deleted\n") except grpc.RpcError as e: print( "[Server-side error] Received following RPC error from the TTS service:", @@ -31,60 +53,96 @@ def delete_recording(args): ) -def get_recording(args): - +def get_recording( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + language_code: str, + voice_name: str, + voice_variant: int, + recording_key: str, + output_path: str, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, ) - output_path = "" - if args.get_recording[2] != "": - output_path = args.get_recording[2] - else: - output_path = args.get_recording[1] + ".wav" + out_path = output_path if output_path != "" else recording_key + ".wav" - request = techmo_tts_pb2.GetRecordingRequest( - voice_name=args.get_recording[0], recording_key=args.get_recording[1] + request = techmo_tts_pb2.GetRecordingRequest( # type: ignore[attr-defined] + voice_profile=techmo_tts_pb2.VoiceProfile( # type: ignore[attr-defined] + voice_name=voice_name, + voice_variant=voice_variant, + language_code=language_code, + ), + recording_key=recording_key, ) - try: stub = rc.get_stub() - response = stub.GetRecording( - request, timeout=rc.get_timeout(), metadata=rc.get_metadata() - ) + response = stub.GetRecording(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) except grpc.RpcError as e: print( "[Server-side error] Received following RPC error from the TTS service:", str(e), ) + return - wave_write = wave.open(output_path, "wb") - wave_write.setnchannels(1) - wave_write.setsampwidth(2) - wave_write.setframerate(response.sampling_rate_hz) - wave_write.writeframes(response.content) - wave_write.close() - + write_wave_file( + out_path, + response.content, + response.sampling_rate_hz, + 1, + 2, + int(AudioFormat.PCM16), + ) -def list_recordings(args): +def list_recordings( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + language_code: str, + voice_name: str, + voice_variant: int, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, + ) + request = techmo_tts_pb2.ListRecordingsRequest( # type: ignore[attr-defined] + voice_profile=techmo_tts_pb2.VoiceProfile( # type: ignore[attr-defined] + voice_name=voice_name, + voice_variant=voice_variant, + language_code=language_code, + ), ) - voice_name = args.voice_to_list_recordings_for - request = techmo_tts_pb2.ListRecordingsRequest(voice_name=voice_name) try: stub = rc.get_stub() - response = stub.ListRecordings( - request, timeout=rc.get_timeout(), metadata=rc.get_metadata() - ) + response = stub.ListRecordings(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) print('\nAvailable recording keys for the voice "' + voice_name + '":\n') print(*response.keys, sep="\n") except grpc.RpcError as e: @@ -94,45 +152,95 @@ def list_recordings(args): ) -def put_recording(args): - +def list_sound_icons( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + language_code: str, + voice_name: str, + voice_variant: int, +) -> None: rc = GrpcRequestConfig( - service=args.service, - tls_directory=args.tls_directory, - grpc_timeout=args.grpc_timeout, - session_id=args.session_id, + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, + ) + request = techmo_tts_pb2.ListSoundIconsRequest( # type: ignore[attr-defined] + voice_profile=techmo_tts_pb2.VoiceProfile( # type: ignore[attr-defined] + voice_name=voice_name, + voice_variant=voice_variant, + language_code=language_code, + ), ) - audio_path = args.put_recording[2] - wave_read = wave.open(audio_path, "rb") - channels = wave_read.getnchannels() - sample_width = wave_read.getsampwidth() - sampling_rate = wave_read.getframerate() - - if channels != 1: - raise ValueError( - "Only mono waves are allowed. {} contains: {} channels".format( - audio_path, channels - ) + try: + stub = rc.get_stub() + response = stub.ListSoundIcons(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) + print('\nAvailable sound icons keys for the voice "' + voice_name + '":\n') + print(*response.keys, sep="\n") + except grpc.RpcError as e: + print( + "[Server-side error] Received following RPC error from the TTS service:", + str(e), ) - if sample_width != 2: - raise ValueError( - "Only 16bit samples are allowed. {} has: {} bit samples".format( - audio_path, sample_width * 8 - ) - ) - audio_content_array = bytearray() - for i in range(wave_read.getnframes()): - audio_content_array.extend(wave_read.readframes(i)) +def put_recording( + service_address: str, + tls: bool, + tls_dir: str, + tls_ca_cert_file: str, + tls_cert_file: str, + tls_private_key_file: str, + session_id: str, + grpc_timeout: int, + language_code: str, + voice_name: str, + voice_variant: int, + recording_key: str, + audio_path: str, +) -> None: + rc = GrpcRequestConfig( + service_address=service_address, + tls=tls, + tls_dir=tls_dir, + tls_ca_cert_file=tls_ca_cert_file, + tls_cert_file=tls_cert_file, + tls_private_key_file=tls_private_key_file, + session_id=session_id, + grpc_timeout=grpc_timeout, + ) + + with wave.open(audio_path, "rb") as wave_read: + channels = wave_read.getnchannels() + sample_width = wave_read.getsampwidth() + sampling_rate = wave_read.getframerate() + + if channels != 1: + raise ValueError(f"Only mono waves are allowed. {audio_path} contains: {channels} channels") + + if sample_width != 2: + raise ValueError(f"Only 16bit samples are allowed. {audio_path} has: {sample_width * 8} bit samples") - wave_read.close() - audio_content = bytes(audio_content_array) + audio_content = bytes(wave_read.readframes(wave_read.getnframes())) - request = techmo_tts_pb2.PutRecordingRequest( - voice_name=args.put_recording[0], - recording_key=args.put_recording[1], + request = techmo_tts_pb2.PutRecordingRequest( # type: ignore[attr-defined] + voice_profile=techmo_tts_pb2.VoiceProfile( # type: ignore[attr-defined] + voice_name=voice_name, + voice_variant=voice_variant, + language_code=language_code, + ), + recording_key=recording_key, sampling_rate_hz=sampling_rate, content=audio_content, ) @@ -140,7 +248,7 @@ def put_recording(args): try: stub = rc.get_stub() stub.PutRecording(request, timeout=rc.get_timeout(), metadata=rc.get_metadata()) - print("\nRecording: ", args.put_recording[1], " has been added\n") + print("\nRecording: ", recording_key, " has been added\n") except grpc.RpcError as e: print( "[Server-side error] Received following RPC error from the TTS service:", diff --git a/tts_client_python/tts_client.py b/tts_client_python/tts_client.py index e475854..2a80568 100644 --- a/tts_client_python/tts_client.py +++ b/tts_client_python/tts_client.py @@ -1,317 +1,883 @@ -from argparse import ArgumentParser +from __future__ import annotations + +import argparse import codecs -from tts_client_python.VERSION import TTS_CLIENT_PYTHON_VERSION +import os +import re +import sys +import textwrap +from collections.abc import Sequence +from typing import Any + import tts_client_python.general as general import tts_client_python.lexicons as lexicons import tts_client_python.recordings as recordings +from tts_client_python.VERSION import TTS_CLIENT_PYTHON_VERSION + +legal_header = textwrap.dedent( + f""" + Techmo TTS Client, version {TTS_CLIENT_PYTHON_VERSION} + Copyright (C) 2026 Techmo sp. z o.o. + """ +) + + +def check_voice_parameters(voice_name: str, language_code: str) -> None: + if voice_name == "": + print("No voice name provided!") + sys.exit(1) + if language_code == "": + print("No voice language_code provided!") + sys.exit(1) + + +class Once(argparse.Action): + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: str | Sequence[Any] | None, + option_string: str | None = None, + ) -> None: + if getattr(namespace, self.dest, self.default) is not self.default: + parser.error("argument {}: allowed once".format("/".join(self.option_strings))) + setattr(namespace, self.dest, values) + + +def ensure_int(value: str) -> int: + try: + int_value = int(value) + except ValueError as err: + raise argparse.ArgumentTypeError(f"invalid int value: '{value}'") from err + else: + return int_value + + +def positive_int(value: str) -> int: + int_value = ensure_int(value) + if int_value <= 0: + raise argparse.ArgumentTypeError( + f"must be greater than 0: '{value}'", + ) + return int_value + + +def unsigned_int(value: str) -> int: + int_value = ensure_int(value) + if int_value < 0: + raise argparse.ArgumentTypeError( + f"must be greater than or equal to 0: '{value}'", + ) + return int_value + + +def valid_tls_dir(value: str) -> str: + if not os.path.isdir(value): + raise argparse.ArgumentTypeError(f"Invalid directory path: '{value}'") + + required_files = ["client.crt", "client.key", "ca.crt"] + missing_files = [f for f in required_files if not os.path.isfile(os.path.join(value, f))] + if missing_files: + raise argparse.ArgumentTypeError(f"There are missing files in tls-dir {value}: {', '.join(missing_files)}") + return value + +def valid_session_id(value: str | None) -> str: + if value is None: + return "" + if not re.match(r"^[0-9a-zA-Z_-]+$", value): + raise argparse.ArgumentTypeError(f"Invalid session ID format: '{value}'") + if len(value) >= 64: + raise argparse.ArgumentTypeError("Session ID is too long (must be shorter than 64 characters)") + return value -def main(): + +def valid_service_address(value: str) -> str: + if not re.match(r"^([a-zA-Z0-9.-]+):([0-9]{1,5})$", value): + raise argparse.ArgumentTypeError(f"Invalid service address format: '{value}'. Pass 'address:port'") + return value + + +def main() -> None: print("Techmo TTS gRPC client " + TTS_CLIENT_PYTHON_VERSION) - parser = ArgumentParser() - parser.add_argument( + parser = argparse.ArgumentParser( + allow_abbrev=False, + description=legal_header, + add_help=False, + formatter_class=argparse.RawTextHelpFormatter, + ) + arguments_list = parser.add_argument_group("DESCRIPTION") + + arguments_list.add_argument( + "-h", + "--help", + action="help", + help=textwrap.dedent( + """\ + Shows this help message. + """ + ), + ) + arguments_list.add_argument( + "--print-service-version", + dest="print_service_version", + action="store_true", + default=False, + help=textwrap.dedent( + """\ + Shows the version of the specified TTS service. + """ + ), + ) + arguments_list.add_argument( + "--print-resources-id", + dest="print_resources_id", + action="store_true", + default=False, + help=textwrap.dedent( + """\ + Prints the identification string of the resources used by the service. + """ + ), + ) + arguments_list.add_argument( "-s", "--service-address", - dest="service", metavar="IP:PORT", required=True, - help="An IP address and port (address:port) of a service the client connects to.", - type=str, + action=Once, + type=valid_service_address, + help=textwrap.dedent( + """\ + An IP address and a port (address:port) of a service the client should connect to. + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "-t", "--text", dest="text", metavar="TEXT", - default="Polski tekst do syntezy", - help="A text to be synthesized.", type=str, + default="", + help=textwrap.dedent( + """\ + A text to be synthesized. Each synthesis request has to provide either the option `--text` + or `--input-path` (input from a file). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "-i", - "--input-text-file", + "--input-path", dest="inputfile", metavar="INPUT_FILE", - default="", - help="A file with text to be synthesized.", + action=Once, type=str, + default="", + help=textwrap.dedent( + """\ + A path to the file with text to be synthesized. Each synthesis request has to provide either + the option `--text` or `--input-path` (input from a file). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "-o", "--out-path", dest="out_path", metavar="OUT_PATH", + action=Once, + type=str, default="", - help="A path to the output wave file with synthesized audio content.", + help=textwrap.dedent( + """\ + A path to the output audio file with synthesized speech content. + """ + ), + ) + arguments_list.add_argument( + "-l", + "--language-code", + dest="language_code", + metavar="LANGUAGE_CODE", + action=Once, type=str, + default="", + help=textwrap.dedent( + """\ + ISO 639-1 language code of the phrase to be synthesized (optional, can be overridden by SSML). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "-r", "--response", dest="response", metavar="RESPONSE_TYPE", + action=Once, + type=str, default="streaming", - help='"streaming" or "single", calls the streaming (default) or non-streaming version of Synthesize.', + choices=["streaming", "single"], + help=textwrap.dedent( + """\ + 'streaming' or 'single', calls the streaming (default) or non-streaming version of Synthesize call. + """ + ), + ) + arguments_list.add_argument( + "--tls", + action="store_true", + help=textwrap.dedent( + """\ + Enables simple one-way TLS encryption, using root certificates retrieved from a default location + chosen by gRPC runtime. Ignored if used along with other '--tls-*' options. + """ + ), + ) + arguments_list.add_argument( + "--tls-dir", + dest="tls_dir", + metavar="arg", + action=Once, + type=valid_tls_dir, + help=textwrap.dedent( + """\ + A path to a directory containing TLS credential files. + The encryption method depends on the directory contents: + - ca.crt - one-way TLS with server authentication using x509 CA Certificate + - client.crt + client.key - mutual TLS + - client.crt + client.key + ca.crt - mutual TLS with server authentication + using x509 CA Certificate. + The credencial files can alternatively be provided using the options: + '--tls-ca-cert-file', '--tls-cert-file', '--tls-private-key-file'. + """ + ), + ) + arguments_list.add_argument( + "--tls-ca-cert-file", + dest="tls_ca_cert_file", + metavar="arg", + action=Once, + type=str, + help=textwrap.dedent( + """\ + A path to the file containing x509 CA Certificate used for server authentication + (with intermediate CA certs, if any, concatenated after CA cert). + """ + ), + ) + arguments_list.add_argument( + "--tls-cert-file", + dest="tls_cert_file", + metavar="arg", + action=Once, + type=str, + help=textwrap.dedent( + """\ + A path to file containing x509 Certificate used for client authentication. This option + must be used along with '--tls-private-key-file'. When these two options are used, + mutual TLS is enabled. Additionally the '--tls-ca-cert-file' option can be used to select + x509 CA Certificate for server authentication. + """ + ), + ) + arguments_list.add_argument( + "--tls-private-key-file", + dest="tls_private_key_file", + metavar="arg", + action=Once, type=str, + help=textwrap.dedent( + """\ + A path to the file containing x509 private key matching tls-cert-file. This option + must be used along with '--tls-cert-file'. When these two options are used, + mutual TLS is enabled. Additionally the '--tls-ca-cert-file' option can be used to select + x509 CA Certificate for server authentication. + """ + ), ) - parser.add_argument( - "--sp", + arguments_list.add_argument( + "--play", + dest="play", + action="store_true", + default=False, + help=textwrap.dedent( + """\ + Plays synthesized audio. Works only with pcm16 (default) encoding. + """ + ), + ) + arguments_list.add_argument( + "--session-id", + dest="session_id", + metavar="SESSION_ID", + action=Once, + type=valid_session_id, + default=None, + help=textwrap.dedent( + """\ + A session ID to be passed to the service. If not specified, the service generates + a default session ID based on the timestamp of the request. + """ + ), + ) + arguments_list.add_argument( + "--grpc-timeout", + dest="grpc_timeout", + metavar="GRPC_TIMEOUT", + action=Once, + type=unsigned_int, + default=0, + help=textwrap.dedent( + """\ + A timeout in milliseconds used to set gRPC deadline - how long the client is willing to wait + for a reply from the server (optional). + """ + ), + ) + arguments_list.add_argument( + "--sampling-rate-hz", + dest="sampling_rate", + metavar="SAMPLING_RATE", + action=Once, + type=unsigned_int, + default=0, + help=textwrap.dedent( + """\ + A sampling rate in Hz of synthesized audio. Set to 0 (default) to use voice's native sampling rate. + """ + ), + ) + arguments_list.add_argument( + "--ae", + "--audio-encoding", + dest="audio_encoding", + metavar="ENCODING", + action=Once, + type=str, + default="pcm16", + choices=["pcm16", "ogg-vorbis", "ogg-opus", "a-law", "mu-law"], + help=textwrap.dedent( + """\ + An encoding of the output audio, pcm16 (default), 'ogg-vorbis', 'ogg-opus', 'a-law', or 'mu-law'. + """ + ), + ) + arguments_list.add_argument( + "--max-frame-size", + dest="max_frame_size", + metavar="MAX_FRAME_SIZE", + action=Once, + type=unsigned_int, + default=0, + help=textwrap.dedent( + """\ + Maximum frame size for RTF (Real Time Factor) throttling. Optional, 0 (default) means that + RTF throttling is disabled. + """ + ), + ) + arguments_list.add_argument( "--speech-pitch", dest="speech_pitch", metavar="SPEECH_PITCH", - default=1.0, - help="Allows adjusting the default pitch of the synthesized speech (optional, can be overridden by SSML).", + action=Once, type=float, + default=1.0, + help=textwrap.dedent( + """\ + Allows adjusting the default pitch of the synthesized speech (optional, can be overridden by SSML). + """ + ), ) - parser.add_argument( - "--sr", + arguments_list.add_argument( "--speech-range", dest="speech_range", metavar="SPEECH_RANGE", - default=1.0, - help="Allows adjusting the default range of the synthesized speech (optional, can be overridden by SSML).", + action=Once, type=float, + default=1.0, + help=textwrap.dedent( + """\ + Allows adjusting the default range of the synthesized speech (optional, can be overridden by SSML). + """ + ), ) - parser.add_argument( - "--ss", + arguments_list.add_argument( "--speech-rate", dest="speech_rate", metavar="SPEECH_RATE", + action=Once, + type=float, default=1.0, - help="Allows adjusting the default rate (speed) of the synthesized speech (optional, can be overridden by SSML).", + help=textwrap.dedent( + """\ + Allows adjusting the default rate (speed) of the synthesized speech (optional, can be overridden + by SSML). + """ + ), + ) + arguments_list.add_argument( + "--speech-stress", + dest="speech_stress", + metavar="SPEECH_STRESS", + action=Once, type=float, + default=1.0, + help=textwrap.dedent( + """\ + Allows adjusting the default stress of the synthesized speech (optional, can be overridden by SSML). + """ + ), ) - parser.add_argument( - "--sv", + arguments_list.add_argument( "--speech-volume", dest="speech_volume", metavar="SPEECH_VOLUME", - default=1.0, - help="Allows adjusting the default volume of the synthesized speech (optional, can be overridden by SSML).", + action=Once, type=float, + default=1.0, + help=textwrap.dedent( + """\ + Allows adjusting the default volume of the synthesized speech (optional, can be overridden by SSML). + """ + ), ) - parser.add_argument( - "--sample-rate", - dest="sample_rate", - metavar="SAMPLE_RATE", - default=0, - help="A sample rate in Hz of synthesized audio. Set to 0 (default) to use voice's original sample rate.", - type=int, - ) - parser.add_argument( - "--ae", - "--audio-encoding", - dest="audio_encoding", - metavar="ENCODING", - default="pcm16", - help="An encoding of the output audio, pcm16 (default) or ogg-vorbis.", - type=str, - ) - parser.add_argument( - "--play", - dest="play", - default=False, - action="store_true", - help="Play synthesized audio. Works only with pcm16 (default) encoding.", - ) - parser.add_argument( - "--phoneme-modifiers", - dest="phoneme_modifiers", - metavar="PHONEME_MODIFIERS", - default="", - help="An array of additional phoneme modifiers: [(index1, pitch1, duration1), ...] for fine-tuning the output audio (optional).", - type=str, - ) - parser.add_argument( - "--session-id", - dest="session_id", - metavar="SESSION_ID", - default="", - help="A session ID to be passed to the service. If not specified, the service generates a default session ID.", - type=str, - ) - parser.add_argument( - "--tls-dir", - dest="tls_directory", - metavar="TLS_DIR", - default="", - help="If set to a path with SSL/TLS credential files (client.crt, client.key, ca.crt), use SSL/TLS authentication. Otherwise use insecure channel (default).", - type=str, - ) - parser.add_argument( - "--grpc-timeout", - dest="grpc_timeout", - metavar="GRPC_TIMEOUT", - default=0, - help="A timeout in milliseconds used to set gRPC deadline - how long the client is willing to wait for a reply from the server (optional).", - type=int, - ) - parser.add_argument( + arguments_list.add_argument( "--list-voices", dest="list_voices", action="store_true", default=False, - help="Lists all available voices.", - ) - parser.add_argument( - "-l", - "--language", - dest="language", - metavar="LANGUAGE", - default="", - help="Language (ISO 639-1 code) of the voice to be used (optional, can be overridden by SSML).", - type=str, + help=textwrap.dedent( + """\ + Lists all available voices. + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--vn", "--voice-name", dest="voice_name", metavar="VOICE_NAME", - default="", - help="A name of the voice to be used (optional, can be overridden by SSML).", + action=Once, type=str, + default="", + help=textwrap.dedent( + """\ + A name of the voice used to synthesize the phrase (optional, can be overridden by SSML). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--vg", "--voice-gender", dest="voice_gender", metavar="VOICE_GENDER", - default="", - help="A gender of the voice to be used. Allowed values: 'female', 'male' (optional, can be overridden by SSML).", + action=Once, type=str, + default="", + help=textwrap.dedent( + """\ + A gender of the voice - 'female' or 'male' (optional, can be overridden by SSML). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--va", "--voice-age", dest="voice_age", metavar="VOICE_AGE", - default="", - help="An age of the voice to be used. Allowed values: 'adult', 'child', 'senile' (optional, can be overridden by SSML).", + action=Once, type=str, + default="", + choices=["adult", "child", "senile"], + help=textwrap.dedent( + """\ + An age of the voice - 'adult', 'child', or 'senile' (optional, can be overridden by SSML). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--voice-variant", dest="voice_variant", metavar="VOICE_VARIANT", - default=0, - help="A variant of the selected voice - unsigned integer (optional, can be overriden by SSML).", - type=int, + action=Once, + type=positive_int, + default=1, + help=textwrap.dedent( + """\ + A variant of the voice - positive integer (optional, can be overridden by SSML). Default value is 1. + """ + ), + ) + arguments_list.add_argument( + "--list-sound-icons", + dest="list_sound_icons", + action="store_true", + default=False, + help=textwrap.dedent( + """\ + Lists all available sound icons for the requested voice and language. This request requires also + arguments: '--voice-name' and '--language-code', and may optionally specify '--voice-variant' + (if not specified, the default variant (1) is used). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--list-recordings", - dest="voice_to_list_recordings_for", - metavar="VOICE_NAME", - default="", - help="Lists all recording keys for the requested voice.", - type=str, + dest="list_recordings", + action="store_true", + default=False, + help=textwrap.dedent( + """\ + Lists all available recordings for the requested voice and language. This request requires also + arguments: '--voice-name' and '--language-code', and may optionally specify '--voice-variant' + (if not specified, the default variant (1) is used). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--get-recording", - nargs=3, - metavar=("VOICE_NAME", "RECORDING_KEY", "OUTPUT_PATH"), - help="Sends back the recording with the requested key for the requested voice in the linear PCM16 format.", + nargs=2, + metavar=("RECORDING_KEY", "OUTPUT_PATH"), + action=Once, type=str, + help=textwrap.dedent( + """\ + Sends back the recording with the requested key for the requested voice in the linear PCM16 format. + This request requires also arguments: '--voice-name' and '--language-code', and may optionally specify + '--voice-variant' (if not specified, the default variant (1) is used). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--put-recording", - nargs=3, - metavar=("VOICE_NAME", "RECORDING_KEY", "AUDIO_PATH"), - help="Adds a new recording with the requested key for the requested voice, or overwrites the existing one if there is already such a key defined. The recording has to be PCM16 WAV audio.", + nargs=2, + metavar=("RECORDING_KEY", "AUDIO_PATH"), + action=Once, type=str, + help=textwrap.dedent( + """\ + Adds a recording to the list of recordings of requested voice and language. This request requires also + arguments: '--voice-name' and '--language-code', and may optionally specify '--voice-variant' + (if not specified, the default variant (1) is used). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--delete-recording", - nargs=2, - metavar=("VOICE_NAME", "RECORDING_KEY"), - help="Removes the recording with the requested key from the list of recordings of the requested voice.", + dest="delete_recording_key", + metavar="RECORDING_KEY", + action=Once, type=str, + help=textwrap.dedent( + """\ + Deletes the recording from the list of recordings of requested voice and language. This request requires + also arguments: '--voice-name' and '--language-code', and may optionally specify '--voice-variant' + (if not specified, the default variant (1) is used). + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--list-lexicons", dest="list_lexicons", action="store_true", default=False, - help="Lists all available lexicons.", + help=textwrap.dedent( + """\ + Lists all available pronunciation lexicons. + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--get-lexicon", - dest="lexicon_to_get", - metavar="LEXICON_NAME", - default="", - help="Sends back the content of the lexicon with the requested name.", + nargs=2, + metavar=("LEXICON_URI", "OUTPUT_PATH"), + action=Once, type=str, + help=textwrap.dedent( + """\ + Saves the content of the lexicon from the service-wide list of lexicons. + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--put-lexicon", - nargs=2, - metavar=("LEXICON_NAME", "LEXICON_CONTENT"), - help="Adds a new lexicon with the requested name or overwrites the existing one if there is already a lexicon with such name. Content of the lexicon shall comply to W3C Pronunciation Lexicon Specification 1.0 (https://www.w3.org/TR/pronunciation-lexicon/).", + nargs=3, + metavar=("LEXICON_URI", "LEXICON_PATH", "OUTSIDE_LOOKUP_BEHAVIOUR"), + action=Once, type=str, + help=textwrap.dedent( + """\ + Adds lexicon to the service-wide list of lexicons. + - LEXICON_URI - a custom string identifying a given lexicon at the service level. + - LEXICON_PATH - path to the lexicon file. + - OUTSIDE_LOOKUP_BEHAVIOUR - 'allowed' (the service uses the lexicon outside the SSML tag) or + 'disallowed'. + """ + ), ) - parser.add_argument( + arguments_list.add_argument( "--delete-lexicon", dest="lexicon_to_delete", - metavar="LEXICON_NAME", - default="", - help="Removes the lexicon with the requested name.", + metavar="LEXICON_URI", + action=Once, type=str, + default="", + help=textwrap.dedent( + """\ + Deletes lexicon from the service-wide list of lexicons. + """ + ), ) # Parse and validate options args = parser.parse_args() # Check if service address and port are provided - if len(args.service) == 0: + if len(args.service_address) == 0: raise RuntimeError("No service address and port provided.") + if (args.tls) and ( + (args.tls_dir is not None) or (args.tls_ca_cert_file is not None) or (args.tls_private_key_file is not None) or (args.tls_cert_file is not None) + ): + raise ValueError("--tls flag cannot be used along with the other --tls-* options.") + + if (args.tls_private_key_file is not None) and (args.tls_cert_file is None): + raise ValueError("The '--tls-private-key-file' option requires the use of '--tls-cert-file'.") + + if (args.tls_cert_file is not None) and (args.tls_private_key_file is None): + raise ValueError("The '--tls-cert-file' option requires the use of '--tls-private-key-file'.") + + if args.print_service_version: + general.print_service_version( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + ) + sys.exit(0) + + if args.print_resources_id: + general.print_resources_id( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + ) + sys.exit(0) + if args.list_voices: - general.list_voices(args) - return + general.list_voices( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + language_code=args.language_code, + ) + sys.exit(0) - if args.list_lexicons: - lexicons.list_lexicons(args) - return + if args.list_sound_icons: + check_voice_parameters(args.voice_name, args.language_code) + recordings.list_sound_icons( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + language_code=args.language_code, + voice_name=args.voice_name, + voice_variant=args.voice_variant, + ) + sys.exit(0) - if args.lexicon_to_get != "": - lexicons.get_lexicon(args) - return + if args.list_recordings: + check_voice_parameters(args.voice_name, args.language_code) + recordings.list_recordings( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + language_code=args.language_code, + voice_name=args.voice_name, + voice_variant=args.voice_variant, + ) + sys.exit(0) - if args.put_lexicon != None: - lexicons.put_lexicon(args) - return + if args.get_recording is not None: + check_voice_parameters(args.voice_name, args.language_code) + recordings.get_recording( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + language_code=args.language_code, + voice_name=args.voice_name, + voice_variant=args.voice_variant, + recording_key=args.get_recording[0], + output_path=args.get_recording[1], + ) + sys.exit(0) - if args.lexicon_to_delete != "": - lexicons.delete_lexicon(args) - return + if args.put_recording is not None: + check_voice_parameters(args.voice_name, args.language_code) + recordings.put_recording( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + language_code=args.language_code, + voice_name=args.voice_name, + voice_variant=args.voice_variant, + recording_key=args.put_recording[0], + audio_path=args.put_recording[1], + ) + sys.exit(0) - if args.voice_to_list_recordings_for != "": - recordings.list_recordings(args) - return + if args.delete_recording_key is not None: + check_voice_parameters(args.voice_name, args.language_code) + recordings.delete_recording( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + language_code=args.language_code, + voice_name=args.voice_name, + voice_variant=args.voice_variant, + recording_key=args.delete_recording_key, + ) + sys.exit(0) - if args.get_recording != None: - recordings.get_recording(args) - return + if args.list_lexicons: + lexicons.list_lexicons( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + language_code=args.language_code, + ) + sys.exit(0) + + if args.get_lexicon is not None: + lexicons.get_lexicon( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + lexicon_uri=args.get_lexicon[0], + output_path=args.get_lexicon[1], + ) + sys.exit(0) - if args.put_recording != None: - recordings.put_recording(args) - return + if args.put_lexicon is not None: + lexicons.put_lexicon( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + lexicon_uri=args.put_lexicon[0], + lexicon_path=args.put_lexicon[1], + outside_lookup_behaviour=args.put_lexicon[2], + ) + sys.exit(0) - if args.delete_recording != None: - recordings.delete_recording(args) - return + if args.lexicon_to_delete != "": + lexicons.delete_lexicon( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + lexicon_uri=args.lexicon_to_delete, + ) + sys.exit(0) # Input text determination - input_text = "" - if len(args.inputfile) > 0: + if args.inputfile != "": with codecs.open(args.inputfile, encoding="utf-8", mode="r") as fread: input_text = fread.read() - elif len(args.text) > 0: + elif args.text != "": input_text = args.text else: - raise RuntimeError("Empty input string for synthesis.") + print("Empty input string for synthesis. Use --text or --input-path to provide text.") + parser.print_help() + sys.exit(1) - general.synthesize(args, input_text) + general.synthesize( + service_address=args.service_address, + tls=args.tls, + tls_dir=args.tls_dir, + tls_ca_cert_file=args.tls_ca_cert_file, + tls_cert_file=args.tls_cert_file, + tls_private_key_file=args.tls_private_key_file, + session_id=args.session_id, + grpc_timeout=args.grpc_timeout, + audio_encoding=args.audio_encoding, + sampling_rate=args.sampling_rate, + max_frame_size=args.max_frame_size, + language_code=args.language_code, + voice_name=args.voice_name, + voice_age=args.voice_age, + voice_gender=args.voice_gender, + voice_variant=args.voice_variant, + speech_pitch=args.speech_pitch, + speech_range=args.speech_range, + speech_rate=args.speech_rate, + speech_stress=args.speech_stress, + speech_volume=args.speech_volume, + play=args.play, + response=args.response, + out_path=args.out_path, + text=input_text, + ) if __name__ == "__main__": diff --git a/tts_client_python/wave_utils.py b/tts_client_python/wave_utils.py new file mode 100644 index 0000000..ecd40d0 --- /dev/null +++ b/tts_client_python/wave_utils.py @@ -0,0 +1,42 @@ +import struct +from enum import IntEnum + + +class AudioFormat(IntEnum): + PCM16 = 1 + A_LAW = 6 + MU_LAW = 7 + + +def write_wave_file( + filename: str, + data: bytearray, + samplerate: int, + numchannels: int, + sampwidth: int, + audio_format: int, +) -> None: + data_size = len(data) + + with open(filename, "wb") as f: + # header + f.write( + struct.pack( + "<4sL4s4sLHHLLHH4sL", + b"RIFF", + 36 + data_size, + b"WAVE", + b"fmt ", + 16, + audio_format, + numchannels, + samplerate, + int(numchannels * samplerate * sampwidth), + int(numchannels * sampwidth), + sampwidth * 8, + b"data", + data_size, + ) + ) + # data + f.write(data) From 20947aa977b1bfdb938bc80d7e4d36cc8f3c0808 Mon Sep 17 00:00:00 2001 From: YASoftwareDev Date: Thu, 26 Mar 2026 00:42:22 +0100 Subject: [PATCH 2/5] ci: add workflow_dispatch trigger --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0a8e14d..db26ca4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,6 +4,7 @@ on: push: branches: ["main", "master"] pull_request: + workflow_dispatch: jobs: test: From d2cbb4d36179a07e565163d79b5a913b72cf717d Mon Sep 17 00:00:00 2001 From: YASoftwareDev Date: Thu, 26 Mar 2026 01:01:17 +0100 Subject: [PATCH 3/5] ci: add setuptools to proto stub venv for GitHub Actions --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index db26ca4..eb4784a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: run: | uv venv .venv source .venv/bin/activate - uv pip install --quiet "grpcio-tools>=1.70.0,<1.71.0" + uv pip install --quiet "grpcio-tools>=1.70.0,<1.71.0" setuptools PYTHONPATH=. python setup.py build_grpc - name: Run tests via tox @@ -58,7 +58,7 @@ jobs: run: | uv venv .venv source .venv/bin/activate - uv pip install --quiet "grpcio-tools>=1.70.0,<1.71.0" + uv pip install --quiet "grpcio-tools>=1.70.0,<1.71.0" setuptools PYTHONPATH=. python setup.py build_grpc - name: Run tests via tox From 6bc3a0eb1715047b696c3f3031f0d678adf48ca5 Mon Sep 17 00:00:00 2001 From: YASoftwareDev Date: Thu, 26 Mar 2026 01:08:43 +0100 Subject: [PATCH 4/5] chore: update tts-service-api submodule to v3.2.0 (adds OGG_OPUS, A_LAW, MU_LAW) --- submodules/tts-service-api | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/tts-service-api b/submodules/tts-service-api index 80d0a37..b1703e1 160000 --- a/submodules/tts-service-api +++ b/submodules/tts-service-api @@ -1 +1 @@ -Subproject commit 80d0a37bcfd2b1a72c19fb88286601d04eca55bd +Subproject commit b1703e1908d3994df6fd28c6b357d7b1efe381d7 From 06eb461ba3e817b11cafaf88229b93a2a6061cb8 Mon Sep 17 00:00:00 2001 From: YASoftwareDev Date: Thu, 26 Mar 2026 06:19:40 +0100 Subject: [PATCH 5/5] ci: fix uv cache-dependency-glob to use pyproject.toml --- .github/workflows/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index eb4784a..ebbbb32 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,6 +25,8 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v5 + with: + cache-dependency-glob: "pyproject.toml" - name: Generate proto stubs run: | @@ -53,6 +55,8 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v5 + with: + cache-dependency-glob: "pyproject.toml" - name: Generate proto stubs run: |