Skip to main content

Prerequisites

Sign up for a free Fish Audio account to get started with our API.
  1. Go to fish.audio/auth/signup
  2. Fill in your details to create an account, complete steps to verify your account.
  3. Log in to your account and navigate to the API section
Once you have an account, you’ll need an API key to authenticate your requests.
  1. Log in to your Fish Audio Dashboard
  2. Navigate to the API Keys section
  3. Click “Create New Key” and give it a descriptive name, set a expiration if desired
  4. Copy your key and store it securely
Keep your API key secret! Never commit it to version control or share it publicly.

Instant Voice Cloning

Clone a voice on-the-fly without creating a persistent model using ReferenceAudio:
from fishaudio import FishAudio
from fishaudio.types import ReferenceAudio
from fishaudio.utils import play

client = FishAudio()

# Clone from reference audio
with open("reference_voice.wav", "rb") as f:
    audio = client.tts.convert(
        text="This will sound like the reference voice",
        references=[ReferenceAudio(
            audio=f.read(),
            text="Text spoken in the reference audio"
        )]
    )
play(audio)
Instant voice cloning is perfect for one-time use cases. For repeated use of the same voice, create a persistent voice model instead.

Multiple Reference Samples

Improve voice quality by providing multiple reference samples:
from fishaudio import FishAudio
from fishaudio.types import ReferenceAudio
from fishaudio.utils import play

client = FishAudio()

# Load multiple reference samples
references = []
samples = [
    ("sample1.wav", "First sample transcript"),
    ("sample2.wav", "Second sample transcript"),
    ("sample3.wav", "Third sample transcript")
]

for audio_file, transcript in samples:
    with open(audio_file, "rb") as f:
        references.append(ReferenceAudio(
            audio=f.read(),
            text=transcript
        ))

# Generate with multiple references
audio = client.tts.convert(
    text="This voice is trained on multiple samples",
    references=references
)
play(audio)

Creating Persistent Voice Models

Create a reusable voice model for consistent voice characteristics using voices.create():
from fishaudio import FishAudio

client = FishAudio()

# Prepare voice samples
voice_samples = []
with open("voice1.wav", "rb") as f1:
    voice_samples.append(f1.read())
with open("voice2.wav", "rb") as f2:
    voice_samples.append(f2.read())

# Create voice model
voice = client.voices.create(
    title="My Custom Voice",
    voices=voice_samples,
    description="A custom voice for my project",
    tags=["custom", "english"],
    visibility="private"
)

print(f"Created voice: {voice.id}")

With Transcripts

Providing transcripts is faster and more accurate than automatic transcription. When you provide transcripts, the system skips running ASR (speech recognition), resulting in better performance and quality:
from fishaudio import FishAudio

client = FishAudio()

# Voice samples with transcripts
samples = [
    ("voice1.wav", "This is the first sample"),
    ("voice2.wav", "This is the second sample"),
    ("voice3.wav", "This is the third sample")
]

voices = []
texts = []

for audio_file, transcript in samples:
    with open(audio_file, "rb") as f:
        voices.append(f.read())
    texts.append(transcript)

# Create voice with transcripts
voice = client.voices.create(
    title="High Quality Voice",
    voices=voices,
    texts=texts,
    description="Voice with accurate transcripts",
    enhance_audio_quality=True
)

print(f"Created voice: {voice.id}")

Audio Quality Enhancement

Enable automatic audio enhancement to clean up noisy reference audio:
voice = client.voices.create(
    title="Enhanced Voice",
    voices=voice_samples,
    enhance_audio_quality=True  # Clean up background noise and normalize levels
)
Audio enhancement helps process noisy or lower-quality reference audio. If your audio is already clean and well-recorded, this may not provide additional benefit.

Managing Voice Models

List Voices

Discover available voices with filtering using voices.list():
from fishaudio import FishAudio

client = FishAudio()

# List all voices
voices = client.voices.list(page_size=20)
print(f"Total voices: {voices.total}")

for voice in voices.items:
    print(f"{voice.title}: {voice.id}")

Filter by Tags and Language

from fishaudio import FishAudio

client = FishAudio()

# Filter by tags
male_voices = client.voices.list(
    tags=["male", "english"],
    page_size=10
)

# Filter by language
chinese_voices = client.voices.list(
    language="zh",
    page_size=10
)

# Get only your own voices
my_voices = client.voices.list(
    self_only=True,
    page_size=20
)

Get Voice Details

Use voices.get() to retrieve voice details:
from fishaudio import FishAudio

client = FishAudio()

# Get specific voice
voice = client.voices.get("bf322df2096a46f18c579d0baa36f41d")  # Adrian

print(f"Title: {voice.title}")
print(f"Description: {voice.description}")
print(f"Tags: {voice.tags}")
print(f"Languages: {voice.languages}")

Update Voice Metadata

Update voice information using voices.update():
from fishaudio import FishAudio

client = FishAudio()

# Update voice information
client.voices.update(
    "bf322df2096a46f18c579d0baa36f41d",  # Adrian
    title="Updated Voice Name",
    description="Updated description",
    visibility="public",  # "public", "unlist", or "private"
    tags=["updated", "english", "male"]
)

Delete Voice

Remove voice models using voices.delete():
from fishaudio import FishAudio

client = FishAudio()

# Delete a voice model
client.voices.delete("bf322df2096a46f18c579d0baa36f41d")  # Adrian
print("Voice deleted successfully")
Deleting a voice is permanent and cannot be undone. Make sure you have backups of any important voice models.

Next Steps