Python Workflow | Resemble

Upload a folder of recordings and transcripts, then build a production-ready voice.

Prerequisites

Python 3.8+
PRO plan (or higher) account and API token
Folder containing paired .wav and .txt files (clip-01.wav + clip-01.txt, etc.)

$ python3 -m venv venv
> source venv/bin/activate
> pip install resemble

1. Script scaffold

1 # resemble-clone-voice-recording/main.py
2 import argparse
3 import os
4 from resemble import Resemble
5 
6 
7 def initialize() -> None:
8     api_key = os.environ.get("RESEMBLE_API_KEY")
9     if not api_key:
10         raise EnvironmentError("Set RESEMBLE_API_KEY before running.")
11     Resemble.api_key(api_key)
12 
13 
14 def parse_args():
15     parser = argparse.ArgumentParser(
16         description="Create a voice from local recordings"
17     )
18     parser.add_argument("--name", required=True)
19     parser.add_argument("--recordings", required=True)
20     return parser.parse_args()

2. Create the voice

1 def create_voice(name: str) -> str:
2     print(f"Creating voice {name}...")
3     response = Resemble.v2.voices.create(name=name)
4     if not response["success"]:
5         raise RuntimeError(response)
6 
7     voice = response["item"]
8     print(f"Voice UUID: {voice['uuid']} (status: {voice['status']})")
9     return voice["uuid"]

3. Prepare recordings

1 import os
2 
3 def read_folder(folder_path: str):
4     entries = []
5     for filename in os.listdir(folder_path):
6         if not filename.endswith(".wav"):
7             continue
8 
9         transcript = filename.replace(".wav", ".txt")
10         transcript_path = os.path.join(folder_path, transcript)
11         if not os.path.exists(transcript_path):
12             print(f"Skipping {filename}; missing transcript.")
13             continue
14 
15         with open(transcript_path, "r", encoding="utf-8") as handle:
16             text = handle.read()
17 
18         entries.append(
19             {
20                 "file_path": os.path.join(folder_path, filename),
21                 "name": transcript,
22                 "text": text,
23             }
24         )
25     return entries

Aim for at least 20 clean samples (1–12 seconds, no silence). Longer files are ignored during training.

4. Upload recordings

1 def upload_recordings(voice_uuid: str, folder_path: str) -> None:
2     recordings = read_folder(folder_path)
3     successes = 0
4 
5     for recording in recordings:
6         print(f"Uploading {recording['name']}...")
7         with open(recording["file_path"], "rb") as audio_file:
8             response = Resemble.v2.recordings.create(
9                 voice_uuid,
10                 audio_file,
11                 recording["name"],
12                 recording["text"],
13                 is_active=True,
14                 emotion="neutral",
15             )
16 
17         if response["success"]:
18             successes += 1
19         else:
20             print(f"Failed to upload {recording['name']}")
21             print(response)
22 
23     print(f"Uploaded {successes}/{len(recordings)} recordings")

5. Trigger the build

1 def trigger_voice_build(voice_uuid: str) -> None:
2     response = Resemble.v2.voices.build(uuid=voice_uuid)
3     if not response["success"]:
4         raise RuntimeError(response)
5     print("Build request submitted. Monitor progress via the API or dashboard.")

6. Wire everything together

1 def main():
2     args = parse_args()
3     initialize()
4 
5     voice_uuid = create_voice(args.name)
6     upload_recordings(voice_uuid, args.recordings)
7     trigger_voice_build(voice_uuid)
8 
9 
10 if __name__ == "__main__":
11     main()

$ RESEMBLE_API_KEY=... python main.py --name "Support Voice" --recordings ./example-data

The script prints upload progress and starts training. Use List Voices or the dashboard to monitor build status until the voice is ready.