C/C++ Windows SAPI实现文字转语音功能_F11 - 专业站长和开发者的学习网站

本文通过封装Windows SAPI（Speech Application Programming Interface），提供了一个现代化的C++接口实现文字转语音功能。主要特性包括支持同步/异步语音合成、可调节语速（-10到10）和音量控制（0-100%），同时支持将合成语音保存为WAV文件，并自动处理特殊字符转义，设计上也确保了线程安全。该接口依赖于Windows系统（需.NET Framework支持）、PowerShell 5.1及以上版本，以及C++11或更高版本。完整代码在文字末尾提供。

快速开始

基础使用示例

#include "tts.hpp"

int main() {

TTS::TextToSpeech tts;

// 设置语音参数

tts.set_rate(5); // 加快语速

tts.set_volume(80); // 80%音量

// 同步朗读

tts.speak_sync("Hello, welcome to the text-to-speech system.");

// 异步朗读

auto future = tts.speak_async("This is an async operation.");

future.wait(); // 等待完成

// 保存到文件

std::string filename = tts.save_to_wav("Audio saved to file.");

return 0;

}

核心功能详解

语音参数设置

语速控制 (set_rate())

1	void set_rate(int rate); // 范围：-10 ~ 10

正值加快语速
负值减慢语速
自动钳制在有效范围内

音量控制 (set_volume())

1	void set_volume(int volume); // 范围：0 ~ 100

0表示静音
100表示最大音量
支持百分比精确控制

同步朗读 (speak_sync())

1	bool speak_sync(const std::string& text);

阻塞当前线程直到朗读完成
返回执行状态（true表示成功）
适合需要顺序执行的场景

示例：

if (!tts.speak_sync("Critical system alert!")) {

// 错误处理

}

异步朗读 (speak_async())

1	std::future<bool> speak_async(const std::string& text);

立即返回std::future对象
支持多种等待方式：

auto future = tts.speak_async("Processing completed");

// 方式1：阻塞等待

future.wait();

// 方式2：轮询检查

while (future.wait_for(100ms) != std::future_status::ready) {

// 执行其他任务

}

// 获取结果

bool success = future.get();

保存音频文件 (save_to_wav())

1 2	std::string save_to_wav(const std::string& text, const std::string& filename = "");

自动生成临时文件（当filename为空时）
返回最终文件路径
文件保存位置规则：
- 指定filename：使用完整路径
- 未指定：生成随机文件名（系统临时目录）

示例：

// 自动生成临时文件

auto auto_file = tts.save_to_wav("Automatic filename");

// 自定义路径

std::string custom_path = R"(C:\audio\alert.wav)";

auto custom_file = tts.save_to_wav("Custom path", custom_path);

高级用法

批量语音生成

std::vector<std::future<bool>> batch_process() {

TTS::TextToSpeech tts;

std::vector<std::future<bool>> results;

for (int i = 0; i < 10; ++i) {

std::string text = "Message " + std::to_string(i);

results.push_back(tts.speak_async(text));

}

return results;

}

实时进度跟踪

void monitor_async() {

auto future = tts.speak_async("Long running operation");

std::thread monitor([&future]{

while (future.wait_for(1s) != std::future_status::ready) {

std::cout << "Synthesizing..." << std::endl;

}

std::cout << "Completed with status: " << future.get() << std::endl;

});

monitor.detach();

}

注意事项与最佳实践

字符处理

自动转义XML特殊字符：&, <, >, ", '
支持多语言文本（需系统语音包支持）
建议预处理用户输入：

std::string sanitize_input(const std::string& raw) {

// 移除控制字符等

std::string filtered;

std::copy_if(raw.begin(), raw.end(), std::back_inserter(filtered),

[](char c){ return std::isprint(c); });

return filtered;

}

性能优化

复用TextToSpeech实例（避免重复初始化）
异步操作时注意生命周期管理：

// 错误示例（对象提前销毁）：

auto future = TTS::TextToSpeech().speak_async("text");

// 正确做法：

auto tts = std::make_shared<TTS::TextToSpeech>();

auto future = tts->speak_async("text");

错误处理

检查返回值：

if (!tts.speak_sync("text")) {

std::cerr << "Speech synthesis failed" << std::endl;

}

常见错误原因：

PowerShell访问权限不足
无效的文件路径
系统语音引擎故障

常见问题解答

Q：支持哪些音频格式？
A：目前仅支持WAV格式，由系统API决定

Q：如何处理中文字符？
A：需确保：

系统已安装中文语音包
代码文件使用UTF-8编码
控制台支持Unicode（建议使用chcp 65001）

Q：为什么需要生成批处理文件？
A：为了解决：

PowerShell直接执行的编码问题
长命令行参数限制
错误代码捕获需求

Q：最大支持文本长度？
A：由系统限制决定，建议分段处理超过1MB的文本

Q：如何实现语音中断？
A：当前版本未实现，但可以通过销毁对象终止异步操作

TTS.hpp 源代码

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

#pragma once

#include <string>

#include <sstream>

#include <cstdlib>

#include <random>

#include <atomic>

#include <thread>

#include <memory>

#include <system_error>

#include <future>

#include <fstream>

#include <cstdio>

#ifdef _WIN32

#include <io.h>

#else

#include <unistd.h>

#endif

namespace TTS {

class TextToSpeech {

public:

static constexpr int MIN_RATE = -10;

static constexpr int MAX_RATE = 10;

static constexpr int MIN_VOLUME = 0;

static constexpr int MAX_VOLUME = 100;

explicit TextToSpeech() = default;

// 设置语音速率（-10~10）

void set_rate(int rate) {

rate_ = clamp(rate, MIN_RATE, MAX_RATE);

}

// 设置音量（0~100）

void set_volume(int volume) {

volume_ = clamp(volume, MIN_VOLUME, MAX_VOLUME);

}

// 同步朗读（阻塞直到完成）

bool speak_sync(const std::string& text) {

return execute_command(generate_ps_command(text));

}

// 异步朗读（立即返回）

std::future<bool> speak_async(const std::string& text) {

return std::async(std::launch::async, [this, text] { return this->speak_sync(text); });

}

// 生成临时WAV文件（返回文件路径）

std::string save_to_wav(const std::string& text, const std::string& filename = "") {

std::string full_path;

bool clean_up;

std::tie(full_path, clean_up) = generate_temp_path(filename, ".wav");

std::string command = generate_ps_command(text, full_path);

if (!execute_command(command)) {

if (clean_up) std::remove(full_path.c_str());

return "";

}

return full_path;

}

private:

int rate_ = 0; // 默认语速

int volume_ = 100; // 默认音量

std::atomic<bool> cancel_flag_{false};

// 生成PowerShell命令

std::string generate_ps_command(const std::string& text, const std::string& output_file = "") const {

std::ostringstream oss;

oss << "powershell -Command \"";

oss << "Add-Type -AssemblyName System.Speech; ";

oss << "$speech = New-Object System.Speech.Synthesis.SpeechSynthesizer; ";

oss << "$speech.Rate = " << rate_ << "; ";

oss << "$speech.Volume = " << volume_ << "; ";

if (!output_file.empty()) {

oss << "$speech.SetOutputToWaveFile('" << output_file << "'); ";

} else {

oss << "$speech.SetOutputToDefaultAudioDevice(); ";

}

oss << "$speech.Speak([System.Xml.XmlConvert]::VerifyXmlChars('"

<< escape_ps_string(escape_xml(text)) << "'));\"";

return oss.str();

}

// 转义 PowerShell 字符串

std::string escape_ps_string(const std::string& text) const {

std::string result;

result.reserve(text.size() * 2);

for (char c : text) {

result += (c == '\'') ? "''" : std::string(1, c);

}

return result;

}

// 执行命令并返回结果

bool execute_command(const std::string& command) const {

// 创建并写入批处理文件

std::string bat_path;

bool dummy;

std::tie(bat_path, dummy) = generate_temp_path("tts_", ".bat");

std::ofstream bat_file(bat_path);

if (!bat_file) return false;

bat_file << "@echo off\n"

<< "chcp 65001 > nul\n"

<< command << "\n"

<< "exit /b %ERRORLEVEL%";

bat_file.close();

// 执行批处理文件

std::string cmd = "cmd /c \"" + bat_path + "\"";

int result = std::system(cmd.c_str());

// 清理临时文件

std::remove(bat_path.c_str());

return (result == 0);

}

// 生成临时文件路径

std::tuple<std::string, bool> generate_temp_path(const std::string& prefix = "tts_", const std::string& extension = "") const {

static std::random_device rd;

static std::mt19937 gen(rd());

std::uniform_int_distribution<> dis(0, 15);

std::string full_path;

bool need_cleanup = false;

if (prefix.empty()) {

char tmp_name[L_tmpnam];

if (std::tmpnam(tmp_name)) {

full_path = tmp_name;

need_cleanup = true;

}

} else {

const std::string temp_dir = get_temp_directory();

do {

std::string unique_part;

for (int i = 0; i < 8; ++i) {

unique_part += "0123456789abcdef"[dis(gen) % 16];

}

full_path = temp_dir + "\\" + prefix + unique_part + extension;

} while (file_exists(full_path));

}

return {full_path, need_cleanup};

}

// XML 转义

static std::string escape_xml(std::string data) {

std::string buffer;

buffer.reserve(data.size());

for (char c : data) {

switch (c) {

case '&': buffer += "&"; break;

case '\"': buffer += """; break;

case '\'': buffer += "'"; break;

case '<': buffer += "<"; break;

case '>': buffer += ">"; break;

default: buffer += c; break;

}

return buffer;

}

// 范围限制函数

template <typename T>

static T clamp(T value, T min, T max) {

return (value < min) ? min : (value > max) ? max : value;

}

// 获取临时目录

static std::string get_temp_directory() {

const char* tmp = std::getenv("TEMP");

if (!tmp) tmp = std::getenv("TMP");

return tmp ? tmp : ".";

}

// 检查文件是否存在

static bool file_exists(const std::string& path) {

#ifdef _WIN32

return ::_access(path.c_str(), 0) == 0;

#else

return ::access(path.c_str(), F_OK) == 0;

#endif

}

};

} // namespace TTS