C#教程
主页 > 软件编程 > C#教程 >

C/C++ Windows SAPI实现文字转语音功能

2025-02-06 | 佚名 | 点击:

本文通过封装Windows SAPI(Speech Application Programming Interface),提供了一个现代化的C++接口实现文字转语音功能。主要特性包括支持同步/异步语音合成、可调节语速(-10到10)和音量控制(0-100%),同时支持将合成语音保存为WAV文件,并自动处理特殊字符转义,设计上也确保了线程安全。该接口依赖于Windows系统(需.NET Framework支持)、PowerShell 5.1及以上版本,以及C++11或更高版本。完整代码在文字末尾提供。

快速开始

基础使用示例

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

#include "tts.hpp"

int main() {

    TTS::TextToSpeech tts;

    // 设置语音参数

    tts.set_rate(5);    // 加快语速

    tts.set_volume(80); // 80%音量

    // 同步朗读

    tts.speak_sync("Hello, welcome to the text-to-speech system.");

    // 异步朗读

    auto future = tts.speak_async("This is an async operation.");

    future.wait(); // 等待完成

    // 保存到文件

    std::string filename = tts.save_to_wav("Audio saved to file.");

    return 0;

}

核心功能详解

语音参数设置

语速控制 (set_rate())

1

void set_rate(int rate);  // 范围:-10 ~ 10

音量控制 (set_volume())

1

void set_volume(int volume);  // 范围:0 ~ 100

同步朗读 (speak_sync())

1

bool speak_sync(const std::string& text);

示例:

1

2

3

if (!tts.speak_sync("Critical system alert!")) {

    // 错误处理

}

异步朗读 (speak_async())

1

std::future<bool> speak_async(const std::string& text);

1

2

3

4

5

6

7

8

9

auto future = tts.speak_async("Processing completed");

// 方式1:阻塞等待

future.wait();

// 方式2:轮询检查

while (future.wait_for(100ms) != std::future_status::ready) {

    // 执行其他任务

}

// 获取结果

bool success = future.get();

保存音频文件 (save_to_wav())

1

2

std::string save_to_wav(const std::string& text,

                       const std::string& filename = "");

示例:

1

2

3

4

5

// 自动生成临时文件

auto auto_file = tts.save_to_wav("Automatic filename");

// 自定义路径

std::string custom_path = R"(C:\audio\alert.wav)";

auto custom_file = tts.save_to_wav("Custom path", custom_path);

高级用法

批量语音生成

1

2

3

4

5

6

7

8

9

std::vector<std::future<bool>> batch_process() {

    TTS::TextToSpeech tts;

    std::vector<std::future<bool>> results;

    for (int i = 0; i < 10; ++i) {

        std::string text = "Message " + std::to_string(i);

        results.push_back(tts.speak_async(text));

    }

    return results;

}

实时进度跟踪

1

2

3

4

5

6

7

8

9

10

void monitor_async() {

    auto future = tts.speak_async("Long running operation");

    std::thread monitor([&future]{

        while (future.wait_for(1s) != std::future_status::ready) {

            std::cout << "Synthesizing..." << std::endl;

        }

        std::cout << "Completed with status: " << future.get() << std::endl;

    });

    monitor.detach();

}

注意事项与最佳实践

字符处理

1

2

3

4

5

6

7

std::string sanitize_input(const std::string& raw) {

    // 移除控制字符等

    std::string filtered;

    std::copy_if(raw.begin(), raw.end(), std::back_inserter(filtered),

        [](char c){ return std::isprint(c); });

    return filtered;

}

性能优化

1

2

3

4

5

// 错误示例(对象提前销毁):

auto future = TTS::TextToSpeech().speak_async("text");

// 正确做法:

auto tts = std::make_shared<TTS::TextToSpeech>();

auto future = tts->speak_async("text");

错误处理

检查返回值:

1

2

3

if (!tts.speak_sync("text")) {

    std::cerr << "Speech synthesis failed" << std::endl;

}

常见错误原因:

常见问题解答

Q:支持哪些音频格式?
A:目前仅支持WAV格式,由系统API决定

Q:如何处理中文字符?
A:需确保:

Q:为什么需要生成批处理文件?
A:为了解决:

Q:最大支持文本长度?
A:由系统限制决定,建议分段处理超过1MB的文本

Q:如何实现语音中断?
A:当前版本未实现,但可以通过销毁对象终止异步操作

TTS.hpp 源代码

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

#pragma once

#include <string>

#include <sstream>

#include <cstdlib>

#include <random>

#include <atomic>

#include <thread>

#include <memory>

#include <system_error>

#include <future>

#include <fstream>

#include <cstdio>

#ifdef _WIN32

#include <io.h>

#else

#include <unistd.h>

#endif

namespace TTS {

class TextToSpeech {

public:

    static constexpr int MIN_RATE = -10;

    static constexpr int MAX_RATE = 10;

    static constexpr int MIN_VOLUME = 0;

    static constexpr int MAX_VOLUME = 100;

    explicit TextToSpeech() = default;

    // 设置语音速率(-10~10)

    void set_rate(int rate) {

        rate_ = clamp(rate, MIN_RATE, MAX_RATE);

    }

    // 设置音量(0~100)

    void set_volume(int volume) {

        volume_ = clamp(volume, MIN_VOLUME, MAX_VOLUME);

    }

    // 同步朗读(阻塞直到完成)

    bool speak_sync(const std::string& text) {

        return execute_command(generate_ps_command(text));

    }

    // 异步朗读(立即返回)

    std::future<bool> speak_async(const std::string& text) {

        return std::async(std::launch::async, [this, text] { return this->speak_sync(text); });

    }

    // 生成临时WAV文件(返回文件路径)

    std::string save_to_wav(const std::string& text, const std::string& filename = "") {

        std::string full_path;

        bool clean_up;

        std::tie(full_path, clean_up) = generate_temp_path(filename, ".wav");

        std::string command = generate_ps_command(text, full_path);

        if (!execute_command(command)) {

            if (clean_up) std::remove(full_path.c_str());

            return "";

        }

        return full_path;

    }

private:

    int rate_ = 0; // 默认语速

    int volume_ = 100; // 默认音量

    std::atomic<bool> cancel_flag_{false};

    // 生成PowerShell命令

    std::string generate_ps_command(const std::string& text, const std::string& output_file = "") const {

        std::ostringstream oss;

        oss << "powershell -Command \"";

        oss << "Add-Type -AssemblyName System.Speech; ";

        oss << "$speech = New-Object System.Speech.Synthesis.SpeechSynthesizer; ";

        oss << "$speech.Rate = " << rate_ << "; ";

        oss << "$speech.Volume = " << volume_ << "; ";

        if (!output_file.empty()) {

            oss << "$speech.SetOutputToWaveFile('" << output_file << "'); ";

        } else {

            oss << "$speech.SetOutputToDefaultAudioDevice(); ";

        }

        oss << "$speech.Speak([System.Xml.XmlConvert]::VerifyXmlChars('"

            << escape_ps_string(escape_xml(text)) << "'));\"";

        return oss.str();

    }

    // 转义 PowerShell 字符串

    std::string escape_ps_string(const std::string& text) const {

        std::string result;

        result.reserve(text.size() * 2);

        for (char c : text) {

            result += (c == '\'') ? "''" : std::string(1, c);

        }

        return result;

    }

    // 执行命令并返回结果

    bool execute_command(const std::string& command) const {

        // 创建并写入批处理文件

        std::string bat_path;

        bool dummy;

        std::tie(bat_path, dummy) = generate_temp_path("tts_", ".bat");

        std::ofstream bat_file(bat_path);

        if (!bat_file) return false;

        bat_file << "@echo off\n"

                 << "chcp 65001 > nul\n"

                 << command << "\n"

                 << "exit /b %ERRORLEVEL%";

        bat_file.close();

        // 执行批处理文件

        std::string cmd = "cmd /c \"" + bat_path + "\"";

        int result = std::system(cmd.c_str());

        // 清理临时文件

        std::remove(bat_path.c_str());

        return (result == 0);

    }

    // 生成临时文件路径

    std::tuple<std::string, bool> generate_temp_path(const std::string& prefix = "tts_", const std::string& extension = "") const {

        static std::random_device rd;

        static std::mt19937 gen(rd());

        std::uniform_int_distribution<> dis(0, 15);

        std::string full_path;

        bool need_cleanup = false;

        if (prefix.empty()) {

            char tmp_name[L_tmpnam];

            if (std::tmpnam(tmp_name)) {

                full_path = tmp_name;

                need_cleanup = true;

            }

        } else {

            const std::string temp_dir = get_temp_directory();

            do {

                std::string unique_part;

                for (int i = 0; i < 8; ++i) {

                    unique_part += "0123456789abcdef"[dis(gen) % 16];

                }

                full_path = temp_dir + "\\" + prefix + unique_part + extension;

            } while (file_exists(full_path));

        }

        return {full_path, need_cleanup};

    }

    // XML 转义

    static std::string escape_xml(std::string data) {

        std::string buffer;

        buffer.reserve(data.size());

        for (char c : data) {

            switch (c) {

                case '&':  buffer += "&amp;";  break;

                case '\"': buffer += "&quot;"; break;

                case '\'': buffer += "&apos;"; break;

                case '<':  buffer += "&lt;";   break;

                case '>':  buffer += "&gt;";   break;

                default:   buffer += c;        break;

            }

        }

        return buffer;

    }

    // 范围限制函数

    template <typename T>

    static T clamp(T value, T min, T max) {

        return (value < min) ? min : (value > max) ? max : value;

    }

    // 获取临时目录

    static std::string get_temp_directory() {

        const char* tmp = std::getenv("TEMP");

        if (!tmp) tmp = std::getenv("TMP");

        return tmp ? tmp : ".";

    }

    // 检查文件是否存在

    static bool file_exists(const std::string& path) {

#ifdef _WIN32

        return ::_access(path.c_str(), 0) == 0;

#else

        return ::access(path.c_str(), F_OK) == 0;

#endif

    }

};

} // namespace TTS

原文链接:
相关文章
最新更新