[{"data":1,"prerenderedAt":504},["ShallowReactive",2],{"content-query-HtDUGM2rP2":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":498,"_id":499,"_source":500,"_file":501,"_stem":502,"_extension":503},"/technology-blogs/zh/3856","zh",false,"","昇思创新训练营优秀创新案例系列分享第一期：新闻稿情感播报","项目代码已开源，欢迎各位开发者体验。","2025-10-13","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/2cf8182ae9d948fa8d2091a061af1734.png","technology-blogs",{"type":14,"children":15,"toc":495},"root",[16,24,27,37,43,47,56,60,65,70,75,103,111,119,124,131,138,146,153,158,168,176,181,206,215,223,231,239,252,260,267,275,282,300,308,313,320,338,345,350,355,362,367,374,379,384,391,396,401,406,419,426,431,454,461,466,471,478,483,490],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"昇思创新训练营优秀创新案例系列分享第一期新闻稿情感播报",[22],{"type":23,"value":8},"text",{"type":17,"tag":18,"props":25,"children":26},{"id":7},[],{"type":17,"tag":18,"props":28,"children":30},{"id":29},"_1前言",[31],{"type":17,"tag":32,"props":33,"children":34},"strong",{},[35],{"type":23,"value":36},"1、前言",{"type":17,"tag":38,"props":39,"children":40},"p",{},[41],{"type":23,"value":42},"昇思MindSpore作为开源的AI框架，为开发人员带来端边云全场景协同、极简开发、极致性能的体验。为鼓励基于昇思MindSpore进行创新，昇思开源社区对昇思创新训练营优秀创新案例进行转载及解读。本篇文章主要介绍了开发者如何基于昇思MindSpore进行大模型微调及调用模型API服务，完成新闻检索-新闻稿内容情感分类-新闻稿带感情语音播报的全流程实践。项目代码已开源，欢迎各位开发者体验。",{"type":17,"tag":18,"props":44,"children":46},{"id":45},"_1",[],{"type":17,"tag":18,"props":48,"children":50},{"id":49},"_2项目意义和价值",[51],{"type":17,"tag":32,"props":52,"children":53},{},[54],{"type":23,"value":55},"2、项目意义和价值",{"type":17,"tag":18,"props":57,"children":59},{"id":58},"_2",[],{"type":17,"tag":38,"props":61,"children":62},{},[63],{"type":23,"value":64},"驾驶、通勤等“伴随式”场景，“听新闻”已成主流，但现有机器播报普遍存在一个核心缺陷：情感表达的缺失。无论是报道英雄事迹的振奋，还是播报自然灾害的沉重，现有系统均采用单一中立的语调，极大削弱了新闻的感染力，并阻碍了用户对信息背后完整意图的感知。",{"type":17,"tag":38,"props":66,"children":67},{},[68],{"type":23,"value":69},"本项目通过新闻检索-情感分类-语音播报的端到端流程实现带有感情语调的新闻播报，提升新闻的感染力，同时应用场景也可扩展至新媒体与有声读物、智能座舱与车载助手、教育与陪伴机器人、舆情监控与品牌管理等多种领域。",{"type":17,"tag":38,"props":71,"children":72},{},[73],{"type":23,"value":74},"本项目的创新性体现在从信息获取到情感表达的全流程覆盖。整个系统采用清晰的模块化架构（包括检索、理解和表达），确保了其高效性、可扩展性，并具备了未来端云部署的灵活性。",{"type":17,"tag":76,"props":77,"children":78},"ul",{},[79,85,90],{"type":17,"tag":80,"props":81,"children":82},"li",{},[83],{"type":23,"value":84},"项目名称：新闻情感播报",{"type":17,"tag":80,"props":86,"children":87},{},[88],{"type":23,"value":89},"团队成员：徐日晞（上海交通大学）、李子晗（上海应用技术大学）、任思宇（上海工程技术大学）",{"type":17,"tag":80,"props":91,"children":92},{},[93,95],{"type":23,"value":94},"项目代码链接: ",{"type":17,"tag":96,"props":97,"children":101},"a",{"href":98,"rel":99},"https://github.com/sunnyxrxrx/Emotional-News-Anchor",[100],"nofollow",[102],{"type":23,"value":98},{"type":17,"tag":38,"props":104,"children":105},{},[106],{"type":17,"tag":107,"props":108,"children":110},"img",{"alt":7,"src":109},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/ade6eacb059446cca1b761e5d22596bb.png",[],{"type":17,"tag":38,"props":112,"children":113},{},[114],{"type":17,"tag":32,"props":115,"children":116},{},[117],{"type":23,"value":118},"1、运行效果",{"type":17,"tag":38,"props":120,"children":121},{},[122],{"type":23,"value":123},"参见如下视频，可实现连续文字检索、情感分类、语音播报。",{"type":17,"tag":38,"props":125,"children":126},{},[127],{"type":17,"tag":107,"props":128,"children":130},{"alt":7,"src":129},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/b7ddcef3a9964858b6f7fe0c5a6bbd7d.png",[],{"type":17,"tag":38,"props":132,"children":133},{},[134],{"type":17,"tag":107,"props":135,"children":137},{"alt":7,"src":136},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/30885be88dec4a5693cba42ce3e5c60e.png",[],{"type":17,"tag":38,"props":139,"children":140},{},[141],{"type":17,"tag":32,"props":142,"children":143},{},[144],{"type":23,"value":145},"1、环境准备",{"type":17,"tag":38,"props":147,"children":148},{},[149],{"type":17,"tag":107,"props":150,"children":152},{"alt":7,"src":151},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/e4ba2c00b5f34da3b5242839698670ec.png",[],{"type":17,"tag":38,"props":154,"children":155},{},[156],{"type":23,"value":157},"安装环境：",{"type":17,"tag":38,"props":159,"children":160},{},[161],{"type":17,"tag":162,"props":163,"children":165},"code",{"className":164},[],[166],{"type":23,"value":167},"bash setup.sh",{"type":17,"tag":38,"props":169,"children":170},{},[171],{"type":17,"tag":32,"props":172,"children":173},{},[174],{"type":23,"value":175},"2、数据集准备",{"type":17,"tag":38,"props":177,"children":178},{},[179],{"type":23,"value":180},"本项目选择DeepSeek-R1-Distill-Qwen-1.5B模型进行微调，可参考如下命令进行预训练权重的下载：",{"type":17,"tag":76,"props":182,"children":183},{},[184,196,201],{"type":17,"tag":80,"props":185,"children":186},{},[187,189],{"type":23,"value":188},"数据集下载：",{"type":17,"tag":96,"props":190,"children":193},{"href":191,"rel":192},"https://figshare.com/articles/dataset/THUCNews%5C_Chinese%5C_News%5C_Text%5C_Classification%5C_Dataset/28279964?file=51924092",[100],[194],{"type":23,"value":195},"https://figshare.com/articles/dataset/THUCNews\\_Chinese\\_News\\_Text\\_Classification\\_Dataset/28279964?file=51924092",{"type":17,"tag":80,"props":197,"children":198},{},[199],{"type":23,"value":200},"申请数据处理相关API（详情请参考项目仓库代码）",{"type":17,"tag":80,"props":202,"children":203},{},[204],{"type":23,"value":205},"数据集处理",{"type":17,"tag":207,"props":208,"children":210},"pre",{"code":209},"python create_label_dataset.py --llm_api_key xxx \n# --llm_api_key：你的大模型api_key\n# --input_dir：下载数据集的存放位置，默认\"./datasets\"\n# --output_dir：数据标注文件的存放位置，默认\"./datasets\"\n# --csv_name：标注文件名，默认\"news_emotion_labeled.csv\"\n",[211],{"type":17,"tag":162,"props":212,"children":213},{"__ignoreMap":7},[214],{"type":23,"value":209},{"type":17,"tag":38,"props":216,"children":217},{},[218],{"type":17,"tag":32,"props":219,"children":220},{},[221],{"type":23,"value":222},"3、模型微调",{"type":17,"tag":207,"props":224,"children":226},{"code":225},"python ft_deepseek_distill.py\n# --model：使用的基座模型，默认\"models/DeepSeek-R1-Distill-Qwen-1.5B\"\n# --input_dir：数据标注文件，默认\"./datasets/news_emotion_labeled.csv\"\n# --save_path：微调模型存放位置，默认\"./models/ft_model\"\n",[227],{"type":17,"tag":162,"props":228,"children":229},{"__ignoreMap":7},[230],{"type":23,"value":225},{"type":17,"tag":38,"props":232,"children":233},{},[234],{"type":17,"tag":32,"props":235,"children":236},{},[237],{"type":23,"value":238},"4、启动交互",{"type":17,"tag":76,"props":240,"children":241},{},[242,247],{"type":17,"tag":80,"props":243,"children":244},{},[245],{"type":23,"value":246},"申请新闻检索和语音播报相关API（详情请参考项目仓库代码）",{"type":17,"tag":80,"props":248,"children":249},{},[250],{"type":23,"value":251},"启动交互",{"type":17,"tag":207,"props":253,"children":255},{"code":254},"python main.py --news_search_api_key xxx --speaker_app_id xxx --speaker_api_key xxx --speaker_secret_key xxx --lora_adapter_path xxx \n# --news_search_api_key：新闻检索api key，需提供\n# --speaker_app_id，--speaker_api_key，--speaker_secret_key为语音播报api相关参数，需提供\n# --model：使用的基座模型，默认\"models/DeepSeek-R1-Distill-Qwen-1.5B\"\n# --lora_adapter_path：使用微调模型的权重路径，需提供\n# --output_dir：输入语音的存放位置，默认'output'\n",[256],{"type":17,"tag":162,"props":257,"children":258},{"__ignoreMap":7},[259],{"type":23,"value":254},{"type":17,"tag":38,"props":261,"children":262},{},[263],{"type":17,"tag":107,"props":264,"children":266},{"alt":7,"src":265},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/8c2267911d2f4e0e85d537b9c3648405.png",[],{"type":17,"tag":38,"props":268,"children":269},{},[270],{"type":17,"tag":32,"props":271,"children":272},{},[273],{"type":23,"value":274},"1、方案流程设计",{"type":17,"tag":38,"props":276,"children":277},{},[278],{"type":17,"tag":107,"props":279,"children":281},{"alt":7,"src":280},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/b2758f2b293d4cf4a379703c666c0681.png",[],{"type":17,"tag":76,"props":283,"children":284},{},[285,290,295],{"type":17,"tag":80,"props":286,"children":287},{},[288],{"type":23,"value":289},"新闻检索：系统具备主动信息获取能力，可以根据用户指令，通过API实时检索网络新闻。",{"type":17,"tag":80,"props":291,"children":292},{},[293],{"type":23,"value":294},"情感理解：获取新闻后，系统会基于昇思MindSpore微调后的模型，对文本进行深度的多维度情感分析，精准识别出喜悦、悲伤等九种复杂情绪。",{"type":17,"tag":80,"props":296,"children":297},{},[298],{"type":23,"value":299},"语音生成：分析得到的情感标签将驱动情感化TTS引擎，自动调整内容的音色、语速和语调，实现从“读稿”到“演绎新闻”的全新体验。",{"type":17,"tag":38,"props":301,"children":302},{},[303],{"type":17,"tag":32,"props":304,"children":305},{},[306],{"type":23,"value":307},"2、情感理解模块架构解析",{"type":17,"tag":38,"props":309,"children":310},{},[311],{"type":23,"value":312},"新闻检索和语音合成主要通过调用API完成，情感理解模块中，我们基于MindSpore进行模型微调，使得模型具备分辨新闻文本情感的能力，在此我们着重介绍下在情感理解模块的架构。",{"type":17,"tag":38,"props":314,"children":315},{},[316],{"type":17,"tag":107,"props":317,"children":319},{"alt":7,"src":318},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/3109be20bbdf47939e16f8a445478997.png",[],{"type":17,"tag":76,"props":321,"children":322},{},[323,328,333],{"type":17,"tag":80,"props":324,"children":325},{},[326],{"type":23,"value":327},"硬件层：依托华为云ModelArts平台昇腾算力。",{"type":17,"tag":80,"props":329,"children":330},{},[331],{"type":23,"value":332},"AI框架层：昇思MindSpore框架提供了函数式+面向对象融合编程、动静统一、高效数据引擎、自动并行等能力， MindSpore NLP套件将这些优势特性与实际需求匹配，实现简便的模型开发，高效的数据预处理，以及预训练模型的快速调用。",{"type":17,"tag":80,"props":334,"children":335},{},[336],{"type":23,"value":337},"模型算法层：通过MindSpore NLP套件调用DeepSeek-R1-Distill-Qwen-1.5B模型，并基于标注好的新闻数据实现LoRA微调。",{"type":17,"tag":38,"props":339,"children":340},{},[341],{"type":17,"tag":107,"props":342,"children":344},{"alt":7,"src":343},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/c52e5c23c4a34116b01eae4632385ec8.png",[],{"type":17,"tag":38,"props":346,"children":347},{},[348],{"type":23,"value":349},"**1、**数据准备（create_label_dataset.py）",{"type":17,"tag":38,"props":351,"children":352},{},[353],{"type":23,"value":354},"采用THUCNews（清华大学中文新闻数据集）作为原始语料库，保障了数据的规模、多样性和高质量。，并利用大模型对更大规模的新闻高效率情感标注（推荐使用deepseek的api）。",{"type":17,"tag":38,"props":356,"children":357},{},[358],{"type":17,"tag":107,"props":359,"children":361},{"alt":7,"src":360},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/d4caf6edba494d24bec5fa75edf2916d.png",[],{"type":17,"tag":38,"props":363,"children":364},{},[365],{"type":23,"value":366},"下图为数据标注的代码实现，如果开发者准备了其他领域的数据集，也可参考下方逻辑，自行设计prompt和模型解码的超参。因为我们的目标是生成简短的情感分类标签，所以我们的想法是将max_tokens设置成了5，限制输出长度，使模型只输出分类结果，减少意外输出；并将temperature设置成0，让输出更确定。",{"type":17,"tag":38,"props":368,"children":369},{},[370],{"type":17,"tag":107,"props":371,"children":373},{"alt":7,"src":372},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/f05d262019a84779953fe1eaef997022.png",[],{"type":17,"tag":38,"props":375,"children":376},{},[377],{"type":23,"value":378},"**2、新闻检索功****能实现（**main.py）",{"type":17,"tag":38,"props":380,"children":381},{},[382],{"type":23,"value":383},"为了优化流程，方便用户自主搜索感兴趣的新闻，我们通过以下代码实现了根据用户输入的文字自动检索相关新闻，并支持AI相关新闻的重点搜索。例如，用户输入关键词“华为”后，可以进一步选择是否聚焦与“华为+AI”相关的新闻（建议使用天行数据API进行检索）。",{"type":17,"tag":38,"props":385,"children":386},{},[387],{"type":17,"tag":107,"props":388,"children":390},{"alt":7,"src":389},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/33d93769d57a44b0937597dba813209b.png",[],{"type":17,"tag":38,"props":392,"children":393},{},[394],{"type":23,"value":395},"**3、模型微****调（**ft_deepseek_distill.py）",{"type":17,"tag":38,"props":397,"children":398},{},[399],{"type":23,"value":400},"在模型微调部分我们选择了LoRA微调，通过向特定层注入低秩矩阵来实现参数更新，从而节省计算资源和内存。",{"type":17,"tag":38,"props":402,"children":403},{},[404],{"type":23,"value":405},"如下是预训练模型实例化的代码，考虑到算力资源和显存占用，我们选择了一个轻量级的中文模型，如果开发者们有其他的偏好，可以更改model_name进行不同模型的实例化，但需要注意：",{"type":17,"tag":76,"props":407,"children":408},{},[409,414],{"type":17,"tag":80,"props":410,"children":411},{},[412],{"type":23,"value":413},"确保模型的规格适合需求。",{"type":17,"tag":80,"props":415,"children":416},{},[417],{"type":23,"value":418},"通常情况下，模型的权重会默认从Hugging Face下载，但由于网络原因，下载可能不成功。建议开发者提前从国内平台下载权重，并在本地加载。",{"type":17,"tag":38,"props":420,"children":421},{},[422],{"type":17,"tag":107,"props":423,"children":425},{"alt":7,"src":424},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/66aad7fbeb5247ff893461379ec320e4.png",[],{"type":17,"tag":38,"props":427,"children":428},{},[429],{"type":23,"value":430},"下图是LoRA微调相关的超参配置，开发者可自行调整。参数说明：",{"type":17,"tag":76,"props":432,"children":433},{},[434,439,444,449],{"type":17,"tag":80,"props":435,"children":436},{},[437],{"type":23,"value":438},"r=8: r是LoRA矩阵的秩。它定义了我们下图里旁路适配器中的矩阵维度，A的维度将是d*r，矩阵B的维度将是r*d。",{"type":17,"tag":80,"props":440,"children":441},{},[442],{"type":23,"value":443},"lora_alpha=32：它是一个缩放参数，控制LoRA模块中权重更新的缩放比例。",{"type":17,"tag":80,"props":445,"children":446},{},[447],{"type":23,"value":448},"lora_dropout=0.1：表示在LoRA模块中以10%的概率随机“关闭”某些神经元连接，以减少过拟合的风险。",{"type":17,"tag":80,"props":450,"children":451},{},[452],{"type":23,"value":453},"target_modules定义了将适配器应用到模型中的哪些层，这里将其应用到了主要的权重层：Q，K，V，投影层O，还有前馈网络的线性层。",{"type":17,"tag":38,"props":455,"children":456},{},[457],{"type":17,"tag":107,"props":458,"children":460},{"alt":7,"src":459},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/57011089a72e4cfabd18bde31e888a14.png",[],{"type":17,"tag":38,"props":462,"children":463},{},[464],{"type":23,"value":465},"**4、情感****化语音生成（**tts_module.py）",{"type":17,"tag":38,"props":467,"children":468},{},[469],{"type":23,"value":470},"我们使用了语音合成技术（Text-to-Speech, TTS）将书面文本转换成人类可以听懂的口头语言。通过调用API，根据情感设置不同的语速、语调等。如下是不同情感的参数参考，我们通过预先定义不同情感所对应的声学参数组合（如语速、语调、音量、发音人等），在调用API时动态传入这些参数，从而生成富含情感。",{"type":17,"tag":38,"props":472,"children":473},{},[474],{"type":17,"tag":107,"props":475,"children":477},{"alt":7,"src":476},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/951ad5cc6560473a8e3b72fc1eb513d8.png",[],{"type":17,"tag":38,"props":479,"children":480},{},[481],{"type":23,"value":482},"在这里，我们一共调出了9种不同情感的语调。从调试经验来看，调高语调值（pit）可以模拟兴奋或愉快时的上扬语调，调高音量值（vol）则能模拟外向的情绪，而调整语速（spd）时需要平衡清晰度。同时，配置情感模型（emo）可以进一步提升效果。欢迎开发者们进行尝试，探索更多不同的语调情感。",{"type":17,"tag":38,"props":484,"children":485},{},[486],{"type":17,"tag":107,"props":487,"children":489},{"alt":7,"src":488},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/10/17/b99efa8267b84de3ba56d3b402bf0473.png",[],{"type":17,"tag":38,"props":491,"children":492},{},[493],{"type":23,"value":494},"通过该项目，我们深入了解了国产化AI开发的全流程闭环。我们基于昇思MindSpore框架微调了DeepSeek-R1的蒸馏模型，最终实现了各个交互环节的整合。 在开发过程中，我们也在考虑，DeepSeek-R1-Distill-Qwen这个模型是否是最合适的选择。实际上，这个模型更侧重于文本生成任务，而对于相对简单的情感分类任务，是否会有点“杀鸡用牛刀”？相比之下，像BERT模型可能会更合适一些。不过，这个项目实践本身是一次非常有价值的收获。 后续我们将进一步优化模型的微调，接入语音交互，实现语音输入和输出。同时，我们计划在香橙派上进行部署，探索端云结合或完全在端侧进行推理的方案。",{"title":7,"searchDepth":496,"depth":496,"links":497},4,[],"markdown","content:technology-blogs:zh:3856.md","content","technology-blogs/zh/3856.md","technology-blogs/zh/3856","md",1776506136202]