Azure cognitive services speech example[python]|电子爱好者

admin管理员组
文章数量:1564180

参考链接：

Text-to-speech API reference (REST) - Speech service - Azure Cognitive Services | Microsoft Learn

cognitive-services-speech-sdk/long_form_text_synthesis.py at 34ba838dd06cc9bb07b1441984265e5859944550 · Azure-Samples/cognitive-services-speech-sdk · GitHub

cognitive-services-speech-sdk/speech_synthesis_sample.py at 34ba838dd06cc9bb07b1441984265e5859944550 · Azure-Samples/cognitive-services-speech-sdk · GitHub

long_form_text_synthesis.py

#!/usr/bin/env python
# coding: utf-8

# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.

import copy
import json
import logging
import time
import xml.etree.ElementTree as ET
from multiprocessing.pool import ThreadPool
from pathlib import Path
from typing import List, Tuple

import azure.cognitiveservices.speech as speechsdk
import nltk
from nltk.tokenize import sent_tokenize
from tqdm import tqdm

from synthesizer_pool import SynthesizerPool

# Only needed for first run
nltk.download('punkt')

logger = logging.getLogger(__name__)


class LongTextSynthesizer:
    def __init__(self, subscription: str, region: str, language: str = 'english',
                 voice: str = 'en-US-JennyNeural', parallel_threads: int = 8) -> None:
        self.is_ssml = None
        self.subscription = subscription
        self.region = region
        self.language = language
        self.voice = voice
        self.parallel_threads = parallel_threads
        self.synthesizer_pool = SynthesizerPool(self._create_synthesizer, self.parallel_threads)

    def _create_synthesizer(self) -> speechsdk.SpeechSynthesizer:
        config = speechsdk.SpeechConfig(subscription=self.subscription, region=self.region)
        config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio24Khz48KBitRateMonoMp3)
        config.set_property(
            speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true')
        config.speech_synthesis_voice_name = self.voice
        return speechsdk.SpeechSynthesizer(config, audio_config=None)

    def synthesize_text_once(self, text: str) -> Tuple[speechsdk.SpeechSynthesisResult,
                                                       List[speechsdk.SpeechSynthesisWordBoundaryEventArgs]]:
        logger.debug("Synthesis started %s", text)
        text_boundaries = []
        finished = []

        def word_boundary_cb(evt: speechsdk.SpeechSynthesisWordBoundaryEventArgs) -> None:
            text_boundaries.append(evt)

        with self.synthesizer_pool.borrow_synthesizer() as synthesizer:
            synthesizer.synthesis_word_boundary.connect(word_boundary_cb)
            synthesizer.synthesis_completed.connect(lambda _: finished.append(True))
            synthesizer.synthesis_canceled.connect(lambda _: finished.append(True))
            for _ in range(3):  # retry count
                text_boundaries = []
                finished = []
                result = synthesizer.speak_ssml_async(text).get() if self.is_ssml else \
                    synthesizer.speak_text_async(text).get()
                if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
                    logger.debug("Synthesis completed %s", text)
                    while not finished:
                        time.sleep(0.1)
                    return result, text_boundaries
                elif result.reason == speechsdk.ResultReason.Canceled:
                    cancellation_details = result.cancellation_details
                    logger.warning("Synthesis canceled, error details %s", cancellation_details.error_details)
                    if cancellation_details.error_code in \
                        [speechsdk.CancellationErrorCode.ConnectionFailure,
                         speechsdk.CancellationErrorCode.ServiceUnavailable,
                         speechsdk.CancellationErrorCode.ServiceTimeout]:
                        logger.info("Synthesis canceled with connection failure, retrying.")
                        continue
                    break
            logger.error("Synthesizer failed to synthesize text")
            return None, None

    def synthesize_text(self, text: str = None, ssml_path: Path = None, output_path: Path = Path.cwd()) -> None:
        output_path.mkdir(parents=True, exist_ok=True)
        all_word_boundaries, all_sentence_boundaries = [], []
        if text is not None:
            sentences = self.split_text(text)
            self.is_ssml = False
        elif ssml_path is not None:
            sentences = self.read_and_split_ssml(ssml_path)
            self.is_ssml = True
        else:
            raise ValueError('Either text or ssml_path must be provided')
        offset = 0
        with ThreadPool(processes=self.parallel_threads) as pool:
            audio_path = output_path / 'audio.mp3'
            with audio_path.open("wb") as f:
                for result, text_boundaries in tqdm(
                        pool.imap(self.synthesize_text_once, sentences), total=len(sentences)):
                    if result is not None:
                        f.write(result.audio_data)
                        for text_boundary in text_boundaries:
                            text_boundary_dict = {
                                'audio_offset': offset + text_boundary.audio_offset / 10000,
                                'duration': text_boundary.duration.total_seconds() * 1000,
                                'text': text_boundary.text
                            }
                            if text_boundary.boundary_type == speechsdk.SpeechSynthesisBoundaryType.Sentence:
                                all_sentence_boundaries.append(text_boundary_dict)
                            else:
                                all_word_boundaries.append(text_boundary_dict)
                        # Calculate the offset for the next sentence,
                        offset += len(result.audio_data) / (48 / 8)
            with (output_path / "word_boundaries.json").open("w", encoding="utf-8") as f:
                json.dump(all_word_boundaries, f, indent=4, ensure_ascii=False)
            with (output_path / "sentence_boundaries.json").open("w", encoding="utf-8") as f:
                json.dump(all_sentence_boundaries, f, indent=4, ensure_ascii=False)

    def split_text(self, text: str) -> List[str]:
        sentences = sent_tokenize(text, language=self.language)
        logger.info(f'Splitting into {len(sentences)} sentences')
        logger.debug(sentences)
        return sentences

    @staticmethod
    def read_and_split_ssml(ssml_path: Path) -> List[str]:
        namespaces = dict([node for _, node in ET.iterparse(ssml_path, events=['start-ns'])])
        for ns in namespaces:
            ET.register_namespace(ns, namespaces[ns])
        root = ET.parse(ssml_path).getroot()
        sentences = []
        speak_element = copy.deepcopy(root)

        for child in list(speak_element):
            _, _, tag = child.tag.rpartition('}')
            if tag != 'voice':
                raise ValueError(f'Only voice element is supported, got {tag}')
            speak_element.remove(child)
        for child in root:
            single_voice = copy.deepcopy(speak_element)
            single_voice.append(child)
            sentences.append(ET.tostring(single_voice, encoding='unicode'))
        return sentences


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    s = LongTextSynthesizer(subscription="YourSubscriptionKey", region="YourServiceRegion")
    with Path('./Gatsby-chapter1.txt').open('r', encoding='utf-8') as r:
        s.synthesize_text(r.read(), output_path=Path('./gatsby'))
    s.synthesize_text(ssml_path=Path('multi-role.xml'), output_path=Path('./multi-role'))

本文标签： Cognitive Azure services Python speech

版权声明：本文标题：Azure cognitive services speech example[python] 内容由热心网友自发贡献，该文观点仅代表作者本人，转载请联系作者并注明出处：https://www.elefans.com/xitong/1727469728a1116077.html，本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容，一经查实，本站将立刻删除。

更多相关文章

xp系统

《Cognitive Mapping and Planning for Visual Navigation》读书笔记

14小时前

原文链接：Cognitive Mapping and Planning for Visual Navigation 他山之石：学界 | 谷歌提交新论文提出认知型地图构建器和规划器&#xf

使用Azure Function + Cognitive Services 实现图片自动化审核

14小时前

假定我们正在运行某个应用程序，此应用程序需要用户在应用程序中提交大量图片文件，那么对于系统管理员来说手动审核这些图片是很消耗时间的，并且对于图片的审核也许并不是即时的。为了解

探索未来交互的钥匙：Azure Cognitive Services 语言理解（LUIS）

14小时前

探索未来交互的钥匙：Azure Cognitive Services 语言理解（LUIS） LUIS-SamplesSamples for the Language Unde

[AI]cognitive business|cognitive thinking

14小时前

just watch the cognitive business presentation of IBM CEO Ginni. here is my thoughts in this field : 1.AI is trend. Thin

python+Selenium自动化操作Chrome模拟手机浏览器

13小时前

在使用Chrome浏览网页时，我们可以使用Chrome开发者工具模拟手机浏览器，在使用Selenium操作Chrome时同样也可以模拟手机浏览器。主要有以下两种用途。测试H5页面在不同分辨率设

python之selenium设置浏览器为手机模式（开发者模式）

13小时前

1. 启动手机模式浏览器， 手机型号只能选以下范围。 from selenium.webdriver.chrome.options import Options# 手机模式# 设置手机型号，这设置

win10：Anaconda+vscode配置python环境

10小时前

目录前言 Python Anaconda 简介安装 vscode 简介安装前言使用Anaconda安装python环境，并用vscode作为python的编译器。 Python python

【Python 版本】之求稳不追新

10小时前

目录前言1 Python 2是被替代的版本2 Python 2的兼容到3.9截止3 推荐 Python 3稳定版4 注意集成开发工具对应的 Python 版本总结附录前言 Python 2.7已经于2020年元旦起不再更新，所以当然用

python 使用迅雷下载

10小时前

python相关学习资料： https:edu.51ctovideo4102.html https:edu.51ctovideo1158.html https:edu.51ctovideo350

我是如何使用python控制迅雷自动下载电影的?

9小时前

https:www.jianshup90d85f13ed87 转载于:https:wwwblogslittlehbp9588726.html

python下载电影_python爬取电影并下载

9小时前

一、概述对于一个宅男，喜欢看电影，每次打开电影网站，各种弹出的广告，很是麻烦，还是要自己去复制下载链接到迅雷上粘贴并下载，这个过程中还有选择困难症;这一系列的动作让人甚是不爽，不如有下好的，点着看就好了;作为一个python爱好者，结合

python电影爬取并下载_python爬取电影并下载

9小时前

对于一个宅男，喜欢看电影，每次打开电影网站，各种弹出的广告，很是麻烦，还是要自己去复制下载链接到迅雷上粘贴并下载，这个过程中还有选择困难症;这一系列的动作让人甚是不爽，不如有下好的，点着看就好了;作为一个python爱好者，结合对爬虫的一点

彻底卸载并重装Anaconda环境与Python的方法

2小时前

本文介绍在Windows平台下，彻底删除Anaconda环境与其自带Python版本，并进行重新安装的方法。最近，由于原有Anaconda环境中的部分第三方库出现了冲突的情

Anaconda和Python卸载重装 -- Mac系统

2小时前

电脑安装python几年了，也基本没怎么学，这个假期打算好好学一下。这时候发现电脑里有很多python版本，有些在terminal中能看到，有些看不到，anaconda也是需要升级，但点升级按钮又闪退，conda升级完打开软件还是提示有新版

python笔记-Selenium谷歌浏览器驱动下载

2小时前

Selenium谷歌浏览器驱动下载地址： https:googlechromelabs.github.iochrome-for-testing#stable 下面是遇到的问题： pyt

搭建python+selenium环境,搭建谷歌浏览器,火狐浏览器,edge浏览器

1小时前

目录 1.安装Python 2.安装Selenium 3.谷歌浏览器环境搭建 4.火狐浏览器环境搭建 5.Edge浏览器环境搭建 1.安装Python 首先，我们需要安装Python。可以从Python官网下载

Python自动化谷歌浏览器

1小时前

1、浏览器下载请在官方网站下载，遇到过非官方下载不可运行情况官网地址：Google Chrome 网络浏览器注：一次下载不成功可刷新页面或在跳转页面点击手动下载&

【python实现网络爬虫（19）】Mac端selemium的使用，谷歌浏览器驱动的下载与安装

1小时前

如果是Windows操作系统，请转到这个链接windows端selenium的使用 Mac端selemium的使用，谷歌浏览器驱动的下载与安装 1. 前期准备2. 安装谷歌浏览器驱动3. 驱动安装

python使用selenium 打开谷歌浏览器闪退, 怎么解决

1小时前

在使用 Selenium 操作 Chrome 浏览器时，如果 Chrome 浏览器闪退，则可能是以下几个方面出现了问题： 1. Chromedriver 版本与 Chrom

分分钟搞定python破解无线wifi

2分钟前

这几天公司有些忙，老加班，更新的有点晚了。趁着休息给大家实战一下怎样用 python破解wifi。前几篇讲了python破解mysql账号和密码, 这次的代码我就不讲解在上篇说过的一些代码逻辑

电子爱好者 - 最新技术资讯及电子产品介绍！

Azure cognitive services speech example[python]

参考链接：

Text-to-speech API reference (REST) - Speech service - Azure Cognitive Services | Microsoft Learn

​​​​​​​cognitive-services-speech-sdk/long_form_text_synthesis.py at 34ba838dd06cc9bb07b1441984265e5859944550 · Azure-Samples/cognitive-services-speech-sdk · GitHub

long_form_text_synthesis.py

更多相关文章

《Cognitive Mapping and Planning for Visual Navigation》读书笔记

使用Azure Function + Cognitive Services 实现图片自动化审核

探索未来交互的钥匙：Azure Cognitive Services 语言理解（LUIS）

[AI]cognitive business|cognitive thinking

python+Selenium自动化操作Chrome模拟手机浏览器

python之selenium设置浏览器为手机模式（开发者模式）

win10：Anaconda+vscode配置python环境

【Python 版本】之求稳不追新

python 使用迅雷下载

我是如何使用python控制迅雷自动下载电影的?

python下载电影_python爬取电影并下载

python电影爬取并下载_python爬取电影并下载

彻底卸载并重装Anaconda环境与Python的方法

Anaconda和Python卸载重装 -- Mac系统

python笔记-Selenium谷歌浏览器驱动下载

搭建python+selenium环境,搭建谷歌浏览器,火狐浏览器,edge浏览器

Python自动化谷歌浏览器

【python实现网络爬虫（19）】Mac端selemium的使用，谷歌浏览器驱动的下载与安装

python使用selenium 打开谷歌浏览器闪退, 怎么解决

分分钟搞定python破解无线wifi

发表评论

推荐文章

android模拟器参数设置,模拟器设置界面详解 - 新手引导 -逍遥安卓论坛 -Powered by Discuz!...

VScode内网穿透ssh远程连接Windows、Ubuntu十分详细步骤(ssh + 阿里云 + frp ssh + tailscale)

Ubuntu16更新内核之后无法进入系统

centos 7 本地跟换WiFi后，不能上网问题解决

计算机网络实验报告（Cisco Packet Tracer）

热门文章

mac环境下搭建frida环境并连接网易mumu模拟器

北理工上网linux版本,北理工路由器上网设置指南

查看路由器地址是否是公网ip

Deepin Linux15 华为荣耀笔记本MagicBook2019使用-安装深度应用商店和应用软件

win7&amp;win10 64位系统，8G内存，3.48G可用解决办法。

linux重启docker镜像没了,解决docker run 或者 docker restart 启动镜像就自动退出

Windows Defender存在威胁执行操作无反应且一直存在红叉(已解决)

linux系统电视播放格式,OpenPCTV--支持电视的 Linux

Linux完全卸载PyTorch&amp;重装（cuda11.1）

谷歌浏览器chrome 在线 离线官方下载地址汇总

最新文章

Android 10.0 app获取当前已连接wifi列表ssid和密码功能实现

再一次获取你的WIFI密码（fluxion附视频）

分分钟搞定python破解无线wifi

记一次老手机连接Wifi显示已保存，却不真正连接

安卓手机WiFi信号桥，增强版个人热点，wifi中继（第三方软件设置）root权限设置增强版个人热点名称密码

Android wifi列表扫描 密码连接 多个wifi切换登录 广播状态等都在这里

修改家中的WiFi密码

越狱iPhone手机使用openSSH wifi和usb连接mac电脑再免密码登录再用shell脚本执行教程

android 手机wifi重启,android – 如何通过重启来记住wifi配置和连接网络

android wifi名称修改器,360随身WIFI(SSID)名称修改工具v1.5.0

[MT8766][Android12] 修改WIFI热点默认名称、密码、IP地址以及默认开启热点

真正的手机破解wifi密码，aircrack-ng,reaver,仅限mx2（BCM4330芯片）

kali linux破解wifi密码-超详细过程

MAC系统下破解WIFI密码

Android Wifi热点通信，及Android7.0上修改手机连接wifi方法，和其他大神提供的方法稍作修改

小米手机肿么还原时钟

15000流明是多少瓦

一般普通投影机功率多大?

苹果绿联转换器有些投影机不能用

坚果V9投影机具体参数?

有关九年级作文850字精选

80后90后_高一作文

中级卫生专业资格中医全科学主治医师中级模拟题2021年(9)案与解析

(精品)师范大学招考硕士研究生课程八六0试卷

ZXMVC8900(V3

【模拟人生4（The Sims 4）性感露背黑色亮片礼服MOD V20190313】模拟人生4（The Sims 4）性感露背黑色亮片礼服MOD V20190313 官方免费下载

【生化危机2：重制版（Resident Evil 2 Remake）克莱尔红头发深色服装MOD】生化危机2：重制版（Resident Evil 2 Remake）克莱尔红头发深色服装MOD 官方免费下载

【模拟人生4（The Sims 4）性感露背深V领吊带裙MOD V20190311】模拟人生4（The Sims 4）性感露背深V领吊带裙MOD V20190311 官方免费下载

【模拟人生4（The Sims 4）科幻风宇宙飞船家庭住宅MOD V20190311】模拟人生4（The Sims 4）科幻风宇宙飞船家庭住宅MOD V20190311 官方免费下载

【鬼泣5（Devil May Cry V）v1.0十四项修改】鬼泣5（Devil May Cry V）v1.0十四项修改 官方免费下载

如何实现高效的treenode搜索算法

treenode与链表有何本质区别

在哪些场景下应优先考虑使用treenode

treenode在树形结构中的角色是什么

cognitive-services-speech-sdk/long_form_text_synthesis.py at 34ba838dd06cc9bb07b1441984265e5859944550 · Azure-Samples/cognitive-services-speech-sdk · GitHub

win7&win10 64位系统，8G内存，3.48G可用解决办法。

Linux完全卸载PyTorch&重装（cuda11.1）

谷歌浏览器chrome 在线离线官方下载地址汇总

Android wifi列表扫描密码连接多个wifi切换登录广播状态等都在这里

【鬼泣5（Devil May Cry V）v1.0十四项修改】鬼泣5（Devil May Cry V）v1.0十四项修改官方免费下载