TTA Speech 2.6 HD ASYNC API | MiniMax High-Quality Text-to-Speech

MiniMax Speech-2.6-hd Async Text-to-Speech

curl --request POST \
  --url https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "voice_setting": {
    "speed": 123,
    "vol": 123,
    "pitch": 123,
    "voice_id": "<string>",
    "emotion": "<string>",
    "text_normalization": true
  },
  "audio_setting": {
    "sample_rate": 123,
    "bitrate": 123,
    "format": "<string>",
    "channel": 123
  },
  "pronunciation_dict": {
    "tone": [
      {}
    ]
  },
  "language_boost": "<string>",
  "voice_modify": {
    "pitch": 123,
    "intensity": 123,
    "timbre": 123,
    "sound_effects": "<string>"
  }
}
'

import requests

url = "https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd"

payload = {
    "text": "<string>",
    "voice_setting": {
        "speed": 123,
        "vol": 123,
        "pitch": 123,
        "voice_id": "<string>",
        "emotion": "<string>",
        "text_normalization": True
    },
    "audio_setting": {
        "sample_rate": 123,
        "bitrate": 123,
        "format": "<string>",
        "channel": 123
    },
    "pronunciation_dict": { "tone": [{}] },
    "language_boost": "<string>",
    "voice_modify": {
        "pitch": 123,
        "intensity": 123,
        "timbre": 123,
        "sound_effects": "<string>"
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    voice_setting: {
      speed: 123,
      vol: 123,
      pitch: 123,
      voice_id: '<string>',
      emotion: '<string>',
      text_normalization: true
    },
    audio_setting: {sample_rate: 123, bitrate: 123, format: '<string>', channel: 123},
    pronunciation_dict: {tone: [{}]},
    language_boost: '<string>',
    voice_modify: {pitch: 123, intensity: 123, timbre: 123, sound_effects: '<string>'}
  })
};

fetch('https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice_setting' => [
        'speed' => 123,
        'vol' => 123,
        'pitch' => 123,
        'voice_id' => '<string>',
        'emotion' => '<string>',
        'text_normalization' => true
    ],
    'audio_setting' => [
        'sample_rate' => 123,
        'bitrate' => 123,
        'format' => '<string>',
        'channel' => 123
    ],
    'pronunciation_dict' => [
        'tone' => [
                [
                                
                ]
        ]
    ],
    'language_boost' => '<string>',
    'voice_modify' => [
        'pitch' => 123,
        'intensity' => 123,
        'timbre' => 123,
        'sound_effects' => '<string>'
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "task_id": "<string>"
}

POST

async

minimax-speech-2.6-hd

MiniMax Speech-2.6-hd Async Text-to-Speech

curl --request POST \
  --url https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "voice_setting": {
    "speed": 123,
    "vol": 123,
    "pitch": 123,
    "voice_id": "<string>",
    "emotion": "<string>",
    "text_normalization": true
  },
  "audio_setting": {
    "sample_rate": 123,
    "bitrate": 123,
    "format": "<string>",
    "channel": 123
  },
  "pronunciation_dict": {
    "tone": [
      {}
    ]
  },
  "language_boost": "<string>",
  "voice_modify": {
    "pitch": 123,
    "intensity": 123,
    "timbre": 123,
    "sound_effects": "<string>"
  }
}
'

import requests

url = "https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd"

payload = {
    "text": "<string>",
    "voice_setting": {
        "speed": 123,
        "vol": 123,
        "pitch": 123,
        "voice_id": "<string>",
        "emotion": "<string>",
        "text_normalization": True
    },
    "audio_setting": {
        "sample_rate": 123,
        "bitrate": 123,
        "format": "<string>",
        "channel": 123
    },
    "pronunciation_dict": { "tone": [{}] },
    "language_boost": "<string>",
    "voice_modify": {
        "pitch": 123,
        "intensity": 123,
        "timbre": 123,
        "sound_effects": "<string>"
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    voice_setting: {
      speed: 123,
      vol: 123,
      pitch: 123,
      voice_id: '<string>',
      emotion: '<string>',
      text_normalization: true
    },
    audio_setting: {sample_rate: 123, bitrate: 123, format: '<string>', channel: 123},
    pronunciation_dict: {tone: [{}]},
    language_boost: '<string>',
    voice_modify: {pitch: 123, intensity: 123, timbre: 123, sound_effects: '<string>'}
  })
};

fetch('https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice_setting' => [
        'speed' => 123,
        'vol' => 123,
        'pitch' => 123,
        'voice_id' => '<string>',
        'emotion' => '<string>',
        'text_normalization' => true
    ],
    'audio_setting' => [
        'sample_rate' => 123,
        'bitrate' => 123,
        'format' => '<string>',
        'channel' => 123
    ],
    'pronunciation_dict' => [
        'tone' => [
                [
                                
                ]
        ]
    ],
    'language_boost' => '<string>',
    'voice_modify' => [
        'pitch' => 123,
        'intensity' => 123,
        'timbre' => 123,
        'sound_effects' => '<string>'
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.myrouter.ai/v3/async/minimax-speech-2.6-hd")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "task_id": "<string>"
}

This API supports asynchronous text-to-speech generation with a maximum of 1 million characters per request. The complete generated audio result can be retrieved asynchronously. Supports 100+ system voices and cloned voices; supports customization of pitch, speed, volume, bitrate, sample rate, and output format. After submitting a long text speech synthesis request, please note that the returned URL is valid for 24 hours from the time it is generated. Please download the content in time.

Suitable for long text speech generation such as entire books. Task queuing may take a long time. For short sentence generation, voice chat, and online social scenarios, it is recommended to use Synchronous Text-to-Speech.

Request Headers

string

required

Enum: application/json

string

required

Bearer authentication format: Bearer {{API Key}}.

Request Body

string

required

The text to be synthesized, limited to a maximum of 50,000 characters.

object

required

Show properties

number

Range [0.5, 2], Default: 1.0The speech rate of the generated audio. Optional; higher values result in faster speech.

number

Range (0, 10], Default: 1.0The volume of the generated audio. Optional; higher values result in louder volume.

number

default:0

Range [-12, 12], Default: 0The pitch of the generated audio. Optional (0 outputs the original voice; value must be an integer).

string

The voice ID for the request.Supports both system voices (ID) and cloned voices (ID). The system voice IDs are as follows:

Youthful Male Voice: male-qn-qingse
Elite Young Male Voice: male-qn-jingying
Assertive Young Male Voice: male-qn-badao
College Male Voice: male-qn-daxuesheng
Young Female Voice: female-shaonv
Mature Female Voice (Confident): female-yujie
Mature Female Voice: female-chengshu
Sweet Female Voice: female-tianmei
Male Presenter: presenter_male
Female Presenter: presenter_female
Male Audiobook 1: audiobook_male_1
Male Audiobook 2: audiobook_male_2
Female Audiobook 1: audiobook_female_1
Female Audiobook 2: audiobook_female_2
Youthful Male Voice-beta: male-qn-qingse-jingpin
Elite Young Male Voice-beta: male-qn-jingying-jingpin
Assertive Young Male Voice-beta: male-qn-badao-jingpin
College Male Voice-beta: male-qn-daxuesheng-jingpin
Young Female Voice-beta: female-shaonv-jingpin
Mature Female Voice (Confident)-beta: female-yujie-jingpin
Mature Female Voice-beta: female-chengshu-jingpin
Sweet Female Voice-beta: female-tianmei-jingpin
Smart Boy: clever_boy
Cute Boy: cute_boy
Adorable Girl: lovely_girl
Cartoon Pig: cartoon_pig
Cute Little Brother: bingjiao_didi
Handsome Boyfriend: junlang_nanyou
Innocent Junior: chunzhen_xuedi
Cool Senior: lengdan_xiongzhang
Assertive Young Master: badao_shaoye
Sweet Ling: tianxin_xiaoling
Playful Girl: qiaopi_mengmei
Charming Lady: wumei_yujie
Cute Junior Girl: diadia_xuemei
Elegant Senior Girl: danya_xuejie
Santa Claus: Santa_Claus
Grinch: Grinch
Rudolph: Rudolph
Arnold: Arnold
Charming Santa: Charming_Santa
Charming Lady: Charming_Lady
Sweet Girl: Sweet_Girl
Cute Elf: Cute_Elf
Attractive Girl: Attractive_Girl
Serene Woman: Serene_Woman

string

Controls the emotion of the synthesized speech.Currently supports 7 emotions: happy, sad, angry, fearful, disgusted, surprised, neutral.Possible values: ["happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"]

bool

default:"false"

This parameter enables English text normalization, which can improve performance in number reading scenarios but slightly increases latency. If not provided, defaults to false.

object

Show properties

number

default:32000

Possible values: [8000, 16000, 22050, 24000, 32000, 44100]The sample rate of the generated audio. Optional, Default: 32000.

number

default:128000

Possible values: [32000, 64000, 128000, 256000]The bitrate of the generated audio. Optional, Default: 128000. This parameter only applies to mp3 format audio.

string

default:"mp3"

The format of the generated audio. Default: mp3. Options: mp3, pcm, flac, wav. wav is only supported in non-streaming output.

number

default:1

The number of audio channels. Default 1: mono. Options:1: Mono2: Stereo

object

Show properties

list

Replace characters, symbols, and their corresponding pronunciations that require special annotation.Replace pronunciation (adjust tone/replace with other character pronunciation), format as follows:["omg/oh my god"]Tones are represented by numbers: 1st tone (high level) is 1, 2nd tone (rising) is 2, 3rd tone (dipping) is 3, 4th tone (falling) is 4, neutral tone is 5.

string

default:"null"

Enhances recognition of specified minority languages and dialects. When set, it can improve speech performance for the specified language/dialect. If the language type is unclear, you can select “auto” and the model will automatically determine the language type. Supported values:

'Chinese', 'Chinese,Yue', 'English', 'Arabic', 'Russian', 'Spanish', 'French', 'Portuguese', 'German', 'Turkish', 'Dutch', 'Ukrainian', 'Vietnamese', 'Indonesian', 'Japanese', 'Italian', 'Korean', 'Thai', 'Polish', 'Romanian', 'Greek', 'Czech', 'Finnish', 'Hindi', 'Bulgarian', 'Danish', 'Hebrew', 'Malay', 'Persian', 'Slovak', 'Swedish', 'Croatian', 'Filipino', 'Hungarian', 'Norwegian', 'Slovenian', 'Catalan', 'Nynorsk', 'Tamil', 'Afrikaans', 'auto'

object

Voice effect settings. Supported audio formats for this parameter: mp3, wav, flac

Show properties

integer

Pitch adjustment (deep/bright), range [-100, 100]. Values closer to -100 produce a deeper sound; values closer to 100 produce a brighter sound.

integer

Intensity adjustment (powerful/soft), range [-100, 100]. Values closer to -100 produce a more powerful sound; values closer to 100 produce a softer sound.

integer

Timbre adjustment (rich/crisp), range [-100, 100]. Values closer to -100 produce a richer sound; values closer to 100 produce a crisper sound.

string

Sound effect settings. Only one can be selected at a time. Possible values:

spacious_echo (Spacious Echo)
auditorium_echo (Auditorium Broadcast)
lofi_telephone (Telephone Distortion)
robotic (Electronic Voice)

Response

string

required

The task_id of the async task. Use the task_id to call the Get Async Task Result API to retrieve the generated result.

MiniMax Speech-2.6-hd Synchronous Text-to-Speech

MiniMax Speech-2.6-turbo Synchronous Text-to-Speech

API Basics

LLM

Image

Video

Audio

MiniMax Speech-2.6-hd Async Text-to-Speech

Request Headers

Request Body

Response

​Request Headers

​Request Body

​Response

Request Headers

Request Body

Response