本文通过封装Windows SAPI(Speech Application Programming Interface),提供了一个现代化的C++接口实现文字转语音功能。主要特性包括支持同步/异步语音合成、可调节语速(-10到10)和音量控制(0-100%),同时支持将合成语音保存为WAV文件,并自动处理特殊字符转义,设计上也确保了线程安全。该接口依赖于Windows系统(需.NET Framework支持)、PowerShell 5.1及以上版本,以及C++11或更高版本。完整代码在文字末尾提供。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
#include "tts.hpp" int main() { TTS::TextToSpeech tts; // 设置语音参数 tts.set_rate(5); // 加快语速 tts.set_volume(80); // 80%音量 // 同步朗读 tts.speak_sync("Hello, welcome to the text-to-speech system."); // 异步朗读 auto future = tts.speak_async("This is an async operation."); future.wait(); // 等待完成 // 保存到文件 std::string filename = tts.save_to_wav("Audio saved to file."); return 0; } |
语音参数设置
语速控制 (set_rate())
1 |
void set_rate(int rate); // 范围:-10 ~ 10 |
音量控制 (set_volume())
1 |
void set_volume(int volume); // 范围:0 ~ 100 |
同步朗读 (speak_sync())
1 |
bool speak_sync(const std::string& text); |
示例:
1 2 3 |
if (!tts.speak_sync("Critical system alert!")) { // 错误处理 } |
异步朗读 (speak_async())
1 |
std::future<bool> speak_async(const std::string& text); |
1 2 3 4 5 6 7 8 9 |
auto future = tts.speak_async("Processing completed"); // 方式1:阻塞等待 future.wait(); // 方式2:轮询检查 while (future.wait_for(100ms) != std::future_status::ready) { // 执行其他任务 } // 获取结果 bool success = future.get(); |
保存音频文件 (save_to_wav())
1 2 |
std::string save_to_wav(const std::string& text, const std::string& filename = ""); |
示例:
1 2 3 4 5 |
// 自动生成临时文件 auto auto_file = tts.save_to_wav("Automatic filename"); // 自定义路径 std::string custom_path = R"(C:\audio\alert.wav)"; auto custom_file = tts.save_to_wav("Custom path", custom_path); |
1 2 3 4 5 6 7 8 9 |
std::vector<std::future<bool>> batch_process() { TTS::TextToSpeech tts; std::vector<std::future<bool>> results; for (int i = 0; i < 10; ++i) { std::string text = "Message " + std::to_string(i); results.push_back(tts.speak_async(text)); } return results; } |
1 2 3 4 5 6 7 8 9 10 |
void monitor_async() { auto future = tts.speak_async("Long running operation"); std::thread monitor([&future]{ while (future.wait_for(1s) != std::future_status::ready) { std::cout << "Synthesizing..." << std::endl; } std::cout << "Completed with status: " << future.get() << std::endl; }); monitor.detach(); } |
1 2 3 4 5 6 7 |
std::string sanitize_input(const std::string& raw) { // 移除控制字符等 std::string filtered; std::copy_if(raw.begin(), raw.end(), std::back_inserter(filtered), [](char c){ return std::isprint(c); }); return filtered; } |
1 2 3 4 5 |
// 错误示例(对象提前销毁): auto future = TTS::TextToSpeech().speak_async("text"); // 正确做法: auto tts = std::make_shared<TTS::TextToSpeech>(); auto future = tts->speak_async("text"); |
检查返回值:
1 2 3 |
if (!tts.speak_sync("text")) { std::cerr << "Speech synthesis failed" << std::endl; } |
常见错误原因:
Q:支持哪些音频格式?
A:目前仅支持WAV格式,由系统API决定
Q:如何处理中文字符?
A:需确保:
Q:为什么需要生成批处理文件?
A:为了解决:
Q:最大支持文本长度?
A:由系统限制决定,建议分段处理超过1MB的文本
Q:如何实现语音中断?
A:当前版本未实现,但可以通过销毁对象终止异步操作
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
#pragma once #include <string> #include <sstream> #include <cstdlib> #include <random> #include <atomic> #include <thread> #include <memory> #include <system_error> #include <future> #include <fstream> #include <cstdio> #ifdef _WIN32 #include <io.h> #else #include <unistd.h> #endif namespace TTS { class TextToSpeech { public: static constexpr int MIN_RATE = -10; static constexpr int MAX_RATE = 10; static constexpr int MIN_VOLUME = 0; static constexpr int MAX_VOLUME = 100; explicit TextToSpeech() = default; // 设置语音速率(-10~10) void set_rate(int rate) { rate_ = clamp(rate, MIN_RATE, MAX_RATE); } // 设置音量(0~100) void set_volume(int volume) { volume_ = clamp(volume, MIN_VOLUME, MAX_VOLUME); } // 同步朗读(阻塞直到完成) bool speak_sync(const std::string& text) { return execute_command(generate_ps_command(text)); } // 异步朗读(立即返回) std::future<bool> speak_async(const std::string& text) { return std::async(std::launch::async, [this, text] { return this->speak_sync(text); }); } // 生成临时WAV文件(返回文件路径) std::string save_to_wav(const std::string& text, const std::string& filename = "") { std::string full_path; bool clean_up; std::tie(full_path, clean_up) = generate_temp_path(filename, ".wav"); std::string command = generate_ps_command(text, full_path); if (!execute_command(command)) { if (clean_up) std::remove(full_path.c_str()); return ""; } return full_path; } private: int rate_ = 0; // 默认语速 int volume_ = 100; // 默认音量 std::atomic<bool> cancel_flag_{false}; // 生成PowerShell命令 std::string generate_ps_command(const std::string& text, const std::string& output_file = "") const { std::ostringstream oss; oss << "powershell -Command \""; oss << "Add-Type -AssemblyName System.Speech; "; oss << "$speech = New-Object System.Speech.Synthesis.SpeechSynthesizer; "; oss << "$speech.Rate = " << rate_ << "; "; oss << "$speech.Volume = " << volume_ << "; "; if (!output_file.empty()) { oss << "$speech.SetOutputToWaveFile('" << output_file << "'); "; } else { oss << "$speech.SetOutputToDefaultAudioDevice(); "; } oss << "$speech.Speak([System.Xml.XmlConvert]::VerifyXmlChars('" << escape_ps_string(escape_xml(text)) << "'));\""; return oss.str(); } // 转义 PowerShell 字符串 std::string escape_ps_string(const std::string& text) const { std::string result; result.reserve(text.size() * 2); for (char c : text) { result += (c == '\'') ? "''" : std::string(1, c); } return result; } // 执行命令并返回结果 bool execute_command(const std::string& command) const { // 创建并写入批处理文件 std::string bat_path; bool dummy; std::tie(bat_path, dummy) = generate_temp_path("tts_", ".bat"); std::ofstream bat_file(bat_path); if (!bat_file) return false; bat_file << "@echo off\n" << "chcp 65001 > nul\n" << command << "\n" << "exit /b %ERRORLEVEL%"; bat_file.close(); // 执行批处理文件 std::string cmd = "cmd /c \"" + bat_path + "\""; int result = std::system(cmd.c_str()); // 清理临时文件 std::remove(bat_path.c_str()); return (result == 0); } // 生成临时文件路径 std::tuple<std::string, bool> generate_temp_path(const std::string& prefix = "tts_", const std::string& extension = "") const { static std::random_device rd; static std::mt19937 gen(rd()); std::uniform_int_distribution<> dis(0, 15); std::string full_path; bool need_cleanup = false; if (prefix.empty()) { char tmp_name[L_tmpnam]; if (std::tmpnam(tmp_name)) { full_path = tmp_name; need_cleanup = true; } } else { const std::string temp_dir = get_temp_directory(); do { std::string unique_part; for (int i = 0; i < 8; ++i) { unique_part += "0123456789abcdef"[dis(gen) % 16]; } full_path = temp_dir + "\\" + prefix + unique_part + extension; } while (file_exists(full_path)); } return {full_path, need_cleanup}; } // XML 转义 static std::string escape_xml(std::string data) { std::string buffer; buffer.reserve(data.size()); for (char c : data) { switch (c) { case '&': buffer += "&"; break; case '\"': buffer += """; break; case '\'': buffer += "'"; break; case '<': buffer += "<"; break; case '>': buffer += ">"; break; default: buffer += c; break; } } return buffer; } // 范围限制函数 template <typename T> static T clamp(T value, T min, T max) { return (value < min) ? min : (value > max) ? max : value; } // 获取临时目录 static std::string get_temp_directory() { const char* tmp = std::getenv("TEMP"); if (!tmp) tmp = std::getenv("TMP"); return tmp ? tmp : "."; } // 检查文件是否存在 static bool file_exists(const std::string& path) { #ifdef _WIN32 return ::_access(path.c_str(), 0) == 0; #else return ::access(path.c_str(), F_OK) == 0; #endif } }; } // namespace TTS |