Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions compose.rocm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: fish-speech-rocm

# AMD ROCm compose for Fish Speech (RDNA3 / RDNA4).
# Mount your checkpoints into ./checkpoints before running.
#
# docker compose -f compose.rocm.yml --profile webui up --build
# docker compose -f compose.rocm.yml --profile server up --build

services:
webui:
build:
context: .
dockerfile: docker/Dockerfile.rocm
target: webui
image: fish-speech-webui:rocm
profiles: ["webui"]
ports:
- "${GRADIO_PORT:-7860}:7860"
volumes:
- ./checkpoints:/app/checkpoints
- ./references:/app/references
environment:
- ROCBLAS_USE_HIPBLASLT=0
- COMPILE=${COMPILE:-1}
devices:
- /dev/kfd
- /dev/dri
group_add:
- video
- render
shm_size: "16g"
tty: true
stdin_open: true

server:
build:
context: .
dockerfile: docker/Dockerfile.rocm
target: server
image: fish-speech-server:rocm
profiles: ["server"]
ports:
- "${API_PORT:-8080}:8080"
volumes:
- ./checkpoints:/app/checkpoints
- ./references:/app/references
environment:
- ROCBLAS_USE_HIPBLASLT=0
- COMPILE=${COMPILE:-1}
devices:
- /dev/kfd
- /dev/dri
group_add:
- video
- render
shm_size: "16g"
tty: true
stdin_open: true
106 changes: 106 additions & 0 deletions docker/Dockerfile.rocm
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# docker/Dockerfile.rocm
#
# Fish Speech on AMD ROCm (RDNA3 / RDNA4).
# The checkpoints are NOT bundled — mount them at /app/checkpoints.
#
# Build:
# docker build -f docker/Dockerfile.rocm --target webui -t fish-speech-webui:rocm .
# docker build -f docker/Dockerfile.rocm --target server -t fish-speech-server:rocm .
#
# Run (webui):
# docker run --device=/dev/kfd --device=/dev/dri \
# --group-add video --group-add render \
# -e ROCBLAS_USE_HIPBLASLT=0 \
# -v ./checkpoints:/app/checkpoints \
# -p 7860:7860 fish-speech-webui:rocm

ARG ROCM_VERSION=7.2.3
ARG BASE_IMAGE=rocm/pytorch:rocm${ROCM_VERSION}_ubuntu24.04_py3.12_pytorch_release_2.9.1

FROM ${BASE_IMAGE} AS app-base

ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
ROCBLAS_USE_HIPBLASLT=0

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
git ffmpeg libsox-dev build-essential cmake \
libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY . /app

# Install runtime dependencies WITHOUT torch/torchaudio — the ROCm base image
# already ships a gfx-tuned torch (2.9.1+rocm7.2.3). Then install the package
# itself with --no-deps so pip does not try to pull a CUDA/CPU torch.
RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
&& pip install --no-cache-dir \
numpy "transformers<=4.57.3" datasets lightning pytorch_lightning \
hydra-core natsort einops librosa rich "gradio>5.0.0" wandb grpcio kui \
uvicorn loguru loralib pyrootutils resampy "einx[torch]==0.2.2" zstandard \
pydub "modelscope==1.17.1" "opencc-python-reimplemented==0.1.7" \
silero-vad ormsgpack tiktoken "pydantic==2.9.2" cachetools \
descript-audio-codec safetensors soundfile vector_quantize_pytorch \
&& pip install --no-cache-dir --no-build-isolation pyaudio \
&& pip install --no-cache-dir --no-deps -e . \
# descript-audiotools pins protobuf<3.20, but fish-speech's generated proto
# code needs >=3.20. Override after install (mirrors pyproject's uv override).
&& pip install --no-cache-dir --no-deps --upgrade "protobuf>=4.25,<6.0"

EXPOSE 7860 8080

# torch.compile is enabled by default (verified working on gfx1201/RDNA4).
# Set COMPILE=0 to disable.
ENV COMPILE=1

##############################################################
# Gradio WebUI
##############################################################
FROM app-base AS webui

ARG GRADIO_SERVER_NAME="0.0.0.0"
ARG GRADIO_SERVER_PORT=7860
ENV GRADIO_SERVER_NAME=${GRADIO_SERVER_NAME} \
GRADIO_SERVER_PORT=${GRADIO_SERVER_PORT}

RUN printf '%s\n' \
'#!/bin/bash' \
'set -e' \
'ARGS=()' \
'if [ "${COMPILE:-0}" = "1" ] || [ "${COMPILE:-}" = "true" ]; then ARGS+=(--compile); fi' \
'exec python tools/run_webui.py \' \
' --llama-checkpoint-path checkpoints/s2-pro \' \
' --decoder-checkpoint-path checkpoints/s2-pro/codec.pth \' \
' --decoder-config-name modded_dac_vq "${ARGS[@]}"' \
> /app/start_webui.sh && chmod +x /app/start_webui.sh

ENTRYPOINT ["/app/start_webui.sh"]

##############################################################
# API Server
##############################################################
FROM app-base AS server

ARG API_SERVER_NAME="0.0.0.0"
ARG API_SERVER_PORT=8080
ENV API_SERVER_NAME=${API_SERVER_NAME} \
API_SERVER_PORT=${API_SERVER_PORT}

RUN printf '%s\n' \
'#!/bin/bash' \
'set -e' \
'ARGS=()' \
'if [ "${COMPILE:-0}" = "1" ] || [ "${COMPILE:-}" = "true" ]; then ARGS+=(--compile); fi' \
'exec python tools/api_server.py \' \
' --listen 0.0.0.0:8080 \' \
' --llama-checkpoint-path checkpoints/s2-pro \' \
' --decoder-checkpoint-path checkpoints/s2-pro/codec.pth \' \
' --decoder-config-name modded_dac_vq "${ARGS[@]}"' \
> /app/start_server.sh && chmod +x /app/start_server.sh

ENTRYPOINT ["/app/start_server.sh"]
37 changes: 37 additions & 0 deletions docs/en/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,40 @@ Both methods require mounting these directories:

!!! warning
GPU support requires NVIDIA Docker runtime. For CPU-only deployment, remove the `--gpus all` flag and use CPU images.

### AMD ROCm support

Fish Speech runs on AMD GPUs via ROCm. The ROCm image is based on the official `rocm/pytorch` image, which already ships a gfx-tuned PyTorch, so no separate torch install is needed. Verified on RDNA4 (Radeon AI PRO R9700 / gfx1201) with ROCm 7.2.3; RDNA3 (gfx1100/gfx1101) should also work.

**Prerequisites:**

- AMD GPU with ROCm support (RDNA3 / RDNA4)
- ROCm drivers installed on the host
- Docker with GPU passthrough (`/dev/kfd` and `/dev/dri`)

**Using Docker Compose:**

```bash
# WebUI
docker compose -f compose.rocm.yml --profile webui up --build

# API server
docker compose -f compose.rocm.yml --profile server up --build
```

**Manual build and run:**

```bash
docker build -f docker/Dockerfile.rocm --target webui -t fish-speech-webui:rocm .

docker run \
--device=/dev/kfd --device=/dev/dri \
--group-add video --group-add render \
-e ROCBLAS_USE_HIPBLASLT=0 \
-v ./checkpoints:/app/checkpoints \
-p 7860:7860 \
fish-speech-webui:rocm
```

!!! note
`ROCBLAS_USE_HIPBLASLT=0` is set by default for RDNA4 (gfx1201) stability; RDNA3 users may not need it. Fish Speech uses `scaled_dot_product_attention`, which dispatches to ROCm's AOTriton flash-attention backend automatically — no custom kernel build is required. The first run is slower while MIOpen auto-tunes kernels. `torch.compile` is enabled by default (`COMPILE=1`); set `COMPILE=0` to disable.