[{"data":1,"prerenderedAt":404},["ShallowReactive",2],{"content-query-B49cNxN37O":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":398,"_id":399,"_source":400,"_file":401,"_stem":402,"_extension":403},"/news/zh/2025-12-15","zh",false,"","昇思人工智能框架峰会 | 基于MindSpore NLP玩转DeepSeek-OCR的开发实践，解锁文本压缩新范式","AI模型正重新定义信息压缩的边界","2025-12-15","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/07/25/199b735845bf4106b44b2035dc97bd39.png","news",{"type":14,"children":15,"toc":387},"root",[16,24,34,42,47,55,63,68,73,85,95,104,109,117,125,130,141,146,151,156,176,181,188,219,224,231,239,250,255,260,269,274,287,296,301,329,334,344,349,354,361,369,377,382],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"昇思人工智能框架峰会-基于mindspore-nlp玩转deepseek-ocr的开发实践解锁文本压缩新范式",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":17,"tag":29,"props":30,"children":31},"strong",{},[32],{"type":23,"value":33},"# 01",{"type":17,"tag":25,"props":35,"children":36},{},[37],{"type":17,"tag":29,"props":38,"children":39},{},[40],{"type":23,"value":41},"当文本遇见视觉，AI模型正重新定义信息压缩的边界",{"type":17,"tag":25,"props":43,"children":44},{},[45],{"type":23,"value":46},"在人工智能快速发展的今天，DeepSeek团队于2025年10月推出的DeepSeek-OCR模型带来了一场文本处理范式的革命。这一创新模型不仅实现了10倍压缩率下97%的解码精度，更探索了通过视觉模态压缩长上下文的全新路径。而昇思MindSpore框架的day0支持能力，则为这一前沿技术的快速部署应用提供了坚实基础。",{"type":17,"tag":25,"props":48,"children":49},{},[50],{"type":17,"tag":29,"props":51,"children":52},{},[53],{"type":23,"value":54},"# 02",{"type":17,"tag":25,"props":56,"children":57},{},[58],{"type":17,"tag":29,"props":59,"children":60},{},[61],{"type":23,"value":62},"DeepSeek-OCR：重新定义文本压缩的边界",{"type":17,"tag":25,"props":64,"children":65},{},[66],{"type":23,"value":67},"DeepSeek-OCR 是 DeepSeek AI 于 2025 年 10 月 发布的多模态模型，以探索视觉 - 文本压缩边界为核心目标，为文档识别、图像转文本提供创新方案。其采用 DeepEncoder 视觉编码器与 DeepSeek3B-MoE-A570M 混合专家解码器的双模块架构，从 LLM 视角重新定义视觉编码器功能，聚焦 “文档解码所需最少视觉 token” 这一核心问题，对研究 “一图胜千言” 原理具有重要意义。",{"type":17,"tag":25,"props":69,"children":70},{},[71],{"type":23,"value":72},"模型的核心技术突破体现在三个方面：",{"type":17,"tag":74,"props":75,"children":77},"h3",{"id":76},"高压缩比下的精度保持实验表明当文本令牌数量在视觉令牌数量的10倍以内即压缩比10倍时模型可以实现97的解码精度即使在20倍压缩率下仍保有约60准确率",[78,83],{"type":17,"tag":29,"props":79,"children":80},{},[81],{"type":23,"value":82},"高压缩比下的精度保持：****实验表明，当文本令牌数量在视觉令牌数量的",{"type":23,"value":84},"10倍以内（即压缩比\u003C10倍）时，模型可以实现97%的解码精度，即使在20倍压缩率下仍保有约60%准确率。",{"type":17,"tag":86,"props":87,"children":89},"h2",{"id":88},"分层视觉编码设计deepencoder采用三阶段处理流程首先使用sam-base进行局部感知窗口注意力看清细节然后通过卷积层进行16倍下采样最后使用clip-large进行全局语义理解这种设计能够在高分辨率输入下保持低激活内存",[90],{"type":17,"tag":29,"props":91,"children":92},{},[93],{"type":23,"value":94},"分层视觉编码设计：DeepEncoder采用三阶段处理流程——首先使用SAM-base进行局部感知（窗口注意力看清细节），然后通过卷积层进行16倍下采样，最后使用CLIP-large进行全局语义理解。这种设计能够在高分辨率输入下保持低激活内存。",{"type":17,"tag":86,"props":96,"children":98},{"id":97},"多分辨率支持模型提供tinysmallbaselargegundam五种配置支持从512512到12801280的不同分辨率输入其中gundam版本专门针对大尺寸复杂文档优化",[99],{"type":17,"tag":29,"props":100,"children":101},{},[102],{"type":23,"value":103},"多分辨率支持：模型提供Tiny/Small/Base/Large/Gundam五种配置，支持从512×512到1280×1280的不同分辨率输入，其中Gundam版本专门针对大尺寸复杂文档优化。",{"type":17,"tag":25,"props":105,"children":106},{},[107],{"type":23,"value":108},"在实际性能方面，DeepSeek-OCR在OmniDocBench测试中表现卓越，仅使用100个视觉token即超越GOT-OCR2.0模型，800个视觉token优于MinerU2.0模型。支持PDF转图像、批量处理及Markdown格式输出。",{"type":17,"tag":25,"props":110,"children":111},{},[112],{"type":17,"tag":29,"props":113,"children":114},{},[115],{"type":23,"value":116},"# 03",{"type":17,"tag":25,"props":118,"children":119},{},[120],{"type":17,"tag":29,"props":121,"children":122},{},[123],{"type":23,"value":124},"Day0支持：MindSpore NLP快速支持DeepSeek-OCR",{"type":17,"tag":25,"props":126,"children":127},{},[128],{"type":23,"value":129},"MindSpore NLP作为基于昇思MindSpore的开源NLP库，**其核心优势在于与Hugging Face生态的全面兼容。**这种兼容性设计使得任何基于Transformers架构的模型都能在昇思MindSpore框架上无缝运行，为DeepSeek-OCR的快速部署提供了技术基础。",{"type":17,"tag":131,"props":132,"children":134},"div",{"style":133},"text-align: center;",[135],{"type":17,"tag":136,"props":137,"children":140},"img",{"src":138,"style":139,"alt":7},"/category/information/news/banner/2025-12-15-1.jpg","display: block;margin: 0 auto;max-width:60%",[],{"type":17,"tag":25,"props":142,"children":143},{},[144],{"type":23,"value":145},"新增2行代码，即可实现基于昇思MindSpore的一键适配",{"type":17,"tag":25,"props":147,"children":148},{},[149],{"type":23,"value":150},"具体而言，MindSpore NLP提供了与Hugging Face完全一致的API接口，开发者可以使用熟悉的AutoModel、AutoTokenizer等类直接加载和运行模型。这种设计极大降低了模型迁移的技术门槛，确保新发布的模型能够实现“day0”支持。",{"type":17,"tag":25,"props":152,"children":153},{},[154],{"type":23,"value":155},"基于MindSpore NLP的兼容性特性，DeepSeek-OCR在昇思MindSpore上的部署变得异常简洁。整个过程主要包含三个关键步骤：",{"type":17,"tag":157,"props":158,"children":159},"ul",{},[160,166,171],{"type":17,"tag":161,"props":162,"children":163},"li",{},[164],{"type":23,"value":165},"环境配置：安装MindSpore NLP及相关依赖库，确保昇思MindSpore版本兼容性",{"type":17,"tag":161,"props":167,"children":168},{},[169],{"type":23,"value":170},"模型加载：使用MindSpore NLP+Transformers接口直接加载DeepSeek-OCR预训练权重",{"type":17,"tag":161,"props":172,"children":173},{},[174],{"type":23,"value":175},"推理执行：调用统一的API进行文档理解和视觉-文本压缩任务",{"type":17,"tag":25,"props":177,"children":178},{},[179],{"type":23,"value":180},"代码如下图所示：",{"type":17,"tag":131,"props":182,"children":183},{"style":133},[184],{"type":17,"tag":136,"props":185,"children":187},{"src":186,"style":139,"alt":7},"/category/information/news/banner/2025-12-15-2.jpg",[],{"type":17,"tag":25,"props":189,"children":190},{},[191,193,208,210],{"type":23,"value":192},"这种标准化流程消除了复杂的模型转换环节，使研究者能够专注于应用开发而非环境适配。无论是处理扫描文档、PDF转换还是长文本压缩，开发者都可以利用熟悉的Hugging Face编程习惯在昇思MindSpore生态中高效运行DeepSeek-OCR，",{"type":17,"tag":29,"props":194,"children":195},{},[196,198,206],{"type":23,"value":197},"完",{"type":17,"tag":29,"props":199,"children":200},{},[201],{"type":17,"tag":29,"props":202,"children":203},{},[204],{"type":23,"value":205},"整",{"type":23,"value":207},"案例",{"type":23,"value":209},"详见：（",{"type":17,"tag":211,"props":212,"children":216},"a",{"href":213,"rel":214},"https://github.com/mindspore-lab/mindnlp/tree/master/examples/transformers/inference/deepseek-ocr%EF%BC%89%E3%80%82",[215],"nofollow",[217],{"type":23,"value":218},"https://github.com/mindspore-lab/mindnlp/tree/master/examples/transformers/inference/deepseek-ocr）。",{"type":17,"tag":25,"props":220,"children":221},{},[222],{"type":23,"value":223},"如下图所示，运行脚本后，模型可识别扫描件中的文字，并转换为MarkDown文件。",{"type":17,"tag":131,"props":225,"children":226},{"style":133},[227],{"type":17,"tag":136,"props":228,"children":230},{"src":229,"style":139,"alt":7},"/category/information/news/banner/2025-12-15-3.jpg",[],{"type":17,"tag":25,"props":232,"children":233},{},[234],{"type":17,"tag":29,"props":235,"children":236},{},[237],{"type":23,"value":238},"# 04",{"type":17,"tag":25,"props":240,"children":241},{},[242],{"type":17,"tag":29,"props":243,"children":244},{},[245],{"type":17,"tag":29,"props":246,"children":247},{},[248],{"type":23,"value":249},"基于Expert合并的小MoE模型加速：权重融合计算优化策略",{"type":17,"tag":25,"props":251,"children":252},{},[253],{"type":23,"value":254},"DeepSeek-OCR的解码器采用混合专家（MoE）架构，激活参数约570M。针对MoE模型训练中的性能挑战，昇思MindSpore提供了基于Expert合并的优化方案，显著提升了小MoE模型的效率。",{"type":17,"tag":25,"props":256,"children":257},{},[258],{"type":23,"value":259},"基于Expert合并的小MoE模型加速技术核心在于通过权重预融合策略，将传统动态路由计算转化为统一计算流，从根本上解决MoE架构中的Host端调度瓶颈问题。",{"type":17,"tag":86,"props":261,"children":263},{"id":262},"_1传统moe计算瓶颈分析",[264],{"type":17,"tag":29,"props":265,"children":266},{},[267],{"type":23,"value":268},"1、传统MoE计算瓶颈分析",{"type":17,"tag":25,"props":270,"children":271},{},[272],{"type":23,"value":273},"传统MoE模型采用“专家视角”的计算模式，其核心瓶颈体现在两个方面：",{"type":17,"tag":157,"props":275,"children":276},{},[277,282],{"type":17,"tag":161,"props":278,"children":279},{},[280],{"type":23,"value":281},"细碎算子调度开销：传统实现方式需要遍历每个专家，为每个专家独立执行前向计算。这种循环遍历模式导致大量小规模算子的频繁调度，特别是当专家数量增多时，Host端的算子下发和调度开销呈线性增长。",{"type":17,"tag":161,"props":283,"children":284},{},[285],{"type":23,"value":286},"负载不均衡问题：由于不同专家处理的token数量差异显著，计算过程中容易出现负载不均衡。某些热门专家需要处理大量token，而其他专家可能处于空闲状态，这种不均衡进一步加剧了设备利用率的下降。",{"type":17,"tag":86,"props":288,"children":290},{"id":289},"_2权重预融合技术原理",[291],{"type":17,"tag":29,"props":292,"children":293},{},[294],{"type":23,"value":295},"2、权重预融合技术原理",{"type":17,"tag":25,"props":297,"children":298},{},[299],{"type":23,"value":300},"基于Expert合并的加速方案通过FFN权重预融合技术，将多个专家的计算任务合并为单一计算流：",{"type":17,"tag":157,"props":302,"children":303},{},[304,324],{"type":17,"tag":161,"props":305,"children":306},{},[307,309,315,317,322],{"type":23,"value":308},"权重合并机制：在模型初始化阶段，将所有专家的FFN层权重进行拼接融合，形成一个统一的超大型权重矩阵。以8专家MoE层为例，每个专家FFN层的输入维度为d_model，中间维度为d_ffn，合并后的权重矩阵形状从8个独立的",{"type":17,"tag":310,"props":311,"children":312},"span",{},[313],{"type":23,"value":314},"d_model, d_ffn",{"type":23,"value":316},"矩阵转变为统一的",{"type":17,"tag":310,"props":318,"children":319},{},[320],{"type":23,"value":321},"8×d_model, d_ffn",{"type":23,"value":323},"矩阵。",{"type":17,"tag":161,"props":325,"children":326},{},[327],{"type":23,"value":328},"统一计算流程：路由网络输出的选择权重不再用于动态激活不同专家，而是作为加权系数直接应用于融合后的计算结果。具体而言，模型首先通过融合权重矩阵执行一次统一的前向计算，然后根据路由权重对输出进行加权组合，避免了传统的专家遍历过程。",{"type":17,"tag":25,"props":330,"children":331},{},[332],{"type":23,"value":333},"针对DeepSeekV2（DeepSeek-OCR LLM模块）的改进代码如下：",{"type":17,"tag":335,"props":336,"children":338},"pre",{"code":337},"def new_forward_for_moe(self, hidden_states):\n  batch_size, sequence_length, hidden_dim = hidden_states.shape\n  selected_experts, routing_weights = self.gate(hidden_states)\n  router_scores = torch.zeros(size=(batch_size * sequence_length, self.config.n_routed_experts), device=hidden_states.device, dtype=hidden_states.dtype)\n  # we cast back to the input dtype\n  routing_weights = routing_weights.to(hidden_states.dtype)\n  router_scores = torch.scatter_add(router_scores, -1, selected_experts, routing_weights)\n  hidden_states = hidden_states.view(-1, hidden_dim)\n  if self.config.n_shared_experts is not None:\n     shared_expert_output = self.shared_experts(hidden_states)\n    \n  hidden_w1 = torch.matmul(hidden_states, self.w1)\n  hidden_w3 = torch.matmul(hidden_states, self.w3)\n  hidden_states = self.act(hidden_w1) * hidden_w3\n  hidden_states = torch.bmm(hidden_states, self.w2) * torch.transpose(router_scores, 0, 1).unsqueeze(-1)\n  final_hidden_states = hidden_states.sum(dim=0, dtype=hidden_states.dtype)\n  if self.config.n_shared_experts is not None:\n     hidden_states = final_hidden_states + shared_expert_output\n  return hidden_states.view(batch_size, sequence_length, hidden_dim)\n",[339],{"type":17,"tag":340,"props":341,"children":342},"code",{"__ignoreMap":7},[343],{"type":23,"value":337},{"type":17,"tag":25,"props":345,"children":346},{},[347],{"type":23,"value":348},"在昇思MindSpore+昇腾的软硬件协同环境中，这一技术大幅提升了DeepSeek-OCR的执行速度，相较于原版实现，推理token生成的性能提升3-4x，算力利用率由8%提升至30%+。这种基于Expert合并的加速思路，为小规模MoE模型的部署提供了一种新的优化范式，特别是在对推理延迟敏感的端侧和应用场景中具有重要价值。",{"type":17,"tag":25,"props":350,"children":351},{},[352],{"type":23,"value":353},"优化后的推理效果如视频中所示：",{"type":17,"tag":131,"props":355,"children":356},{"style":133},[357],{"type":17,"tag":136,"props":358,"children":360},{"src":359,"style":139,"alt":7},"/category/information/news/banner/2025-12-15-4.jpg",[],{"type":17,"tag":25,"props":362,"children":363},{},[364],{"type":17,"tag":29,"props":365,"children":366},{},[367],{"type":23,"value":368},"# 05",{"type":17,"tag":25,"props":370,"children":371},{},[372],{"type":17,"tag":29,"props":373,"children":374},{},[375],{"type":23,"value":376},"总结",{"type":17,"tag":25,"props":378,"children":379},{},[380],{"type":23,"value":381},"DeepSeek-OCR与昇思MindSpore在昇腾硬件上的深度结合，标志着文档智能处理进入了一个全新的发展阶段。这一技术组合不仅展现了前沿AI模型的创新潜力，更体现了从算法、框架到硬件的全栈优化价值。",{"type":17,"tag":25,"props":383,"children":384},{},[385],{"type":23,"value":386},"展望未来，随着多模态大模型技术的持续演进和昇腾算力基础设施的不断完善，OCR模型与昇思MindSpore的深度结合将释放更大潜力。从简单的文档识别到复杂的知识抽取，从单页处理到跨文档分析，这一技术路径正在开启文档智能的新篇章，为企业数字化转型和AI普惠应用提供坚实的技术底座。",{"title":7,"searchDepth":388,"depth":388,"links":389},4,[390,393,395,396,397],{"id":76,"depth":391,"text":392},3,"高压缩比下的精度保持：****实验表明，当文本令牌数量在视觉令牌数量的10倍以内（即压缩比\u003C10倍）时，模型可以实现97%的解码精度，即使在20倍压缩率下仍保有约60%准确率。",{"id":88,"depth":394,"text":94},2,{"id":97,"depth":394,"text":103},{"id":262,"depth":394,"text":268},{"id":289,"depth":394,"text":295},"markdown","content:news:zh:2025-12-15.md","content","news/zh/2025-12-15.md","news/zh/2025-12-15","md",1776506059839]