[{"data":1,"prerenderedAt":485},["ShallowReactive",2],{"content-query-3BOwURt9co":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":479,"_id":480,"_source":481,"_file":482,"_stem":483,"_extension":484},"\u002Ftechnology-blogs\u002Fzh\u002F2026-5-29","zh",false,"","在 MindSpore AKG Agent 中的 Harness 设计：面向长程任务的状态机方法","将约束以代码逻辑的形式实现在外层 harness 中，让 Agent 专注于推理、方向判断与代码修改，而流程控制与边界检查由 harness 保障。","2026-5-29","https:\u002F\u002Fobs-mindspore-file.obs.cn-north-4.myhuaweicloud.com\u002Ffile\u002F2024\u002F11\u002F28\u002F8e0e0150508a4c5ba4287fa3bec8ea3f.png","technology-blogs","技术解读",{"type":15,"children":16,"toc":465},"root",[17,25,31,36,47,52,57,62,67,74,79,84,109,114,119,126,131,137,142,147,154,159,172,177,184,189,194,216,221,226,231,236,249,254,260,265,272,277,282,300,305,310,316,321,326,332,337,342,360,365,378,383,406,412,417,422,427,433,438,445,450,455,460],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"在-mindspore-akg-agent-中的-harness-设计面向长程任务的状态机方法",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":24,"value":30},"当 Agent 需要连续运行数十小时甚至上百小时，且无人实时监督时，仅靠提示词约束其流程、记忆与行为边界，几乎必然会在若干轮后偏离预期。这些约束写入系统消息，指令偏移的风险始终存在。",{"type":18,"tag":26,"props":32,"children":33},{},[34],{"type":24,"value":35},"更可靠的做法是：将约束以代码逻辑的形式实现在外层 harness 中，让 Agent 专注于推理、方向判断与代码修改，而流程控制与边界检查由 harness 保障。Anthropic 曾精辟地概括过纯提示词驱动的困境：",{"type":18,"tag":37,"props":38,"children":40},"div",{"style":39},"text-align: center;",[41],{"type":18,"tag":42,"props":43,"children":46},"img",{"src":44,"style":45,"alt":7},"\u002Fcategory\u002Finformation\u002Ftechnology-blogs\u002Fbanner\u002F2026-5-29\u002F1.jpg","display: block;margin: 0 auto;max-width:60%",[],{"type":18,"tag":26,"props":48,"children":49},{},[50],{"type":24,"value":51},"“这两种都不是良好的提示词范式：",{"type":18,"tag":26,"props":53,"children":54},{},[55],{"type":24,"value":56},"一种是用脆弱的硬编码 if-else 过分详细地描述流程；",{"type":18,"tag":26,"props":58,"children":59},{},[60],{"type":24,"value":61},"另一种是提示词过于模糊或错误地预设了上下文。”",{"type":18,"tag":26,"props":63,"children":64},{},[65],{"type":24,"value":66},"何时赋予自由、何时划定边界？本文以 MindSpore AKG Agent 中的算子性能调优任务 Auto Research（自动研究循环）为场景，提出一种基于状态机的 harness 设计——在保留 Agent 自主性的前提下，让长程任务跑得稳、可追溯、可复现。",{"type":18,"tag":68,"props":69,"children":71},"h2",{"id":70},"_01-auto-research-与-agent-loop-结构",[72],{"type":24,"value":73},"01 Auto Research 与 Agent Loop 结构",{"type":18,"tag":26,"props":75,"children":76},{},[77],{"type":24,"value":78},"Auto Research 源名于 Karpathy 近期公开的一个原型项目，用于迭代优化其 nanochat（GPT-2 的简化教学实现）的模型与性能。任务设定为：人提供优化指标（如 validation loss）、待优化对象集合、版本控制工具（如 git）；其余循环由 Agent 执行。人定义目标和对象，Agent 尝试不同修改路径。该模式可推广至算子库性能调优、推理内核优化、数据配方搜索、提示词工程等场景。",{"type":18,"tag":26,"props":80,"children":81},{},[82],{"type":24,"value":83},"Auto Research 的核心难点在于保持循环流程的长期稳定：每轮 Agent 需基于当前代码与反馈自主决策下一步，同时必须防止以下行为：",{"type":18,"tag":85,"props":86,"children":87},"ul",{},[88,94,99,104],{"type":18,"tag":89,"props":90,"children":91},"li",{},[92],{"type":24,"value":93},"跳过评测环节",{"type":18,"tag":89,"props":95,"children":96},{},[97],{"type":24,"value":98},"修改验收标准",{"type":18,"tag":89,"props":100,"children":101},{},[102],{"type":24,"value":103},"遗忘已失败的方向",{"type":18,"tag":89,"props":105,"children":106},{},[107],{"type":24,"value":108},"在上下文压缩后丢失历史经验",{"type":18,"tag":26,"props":110,"children":111},{},[112],{"type":24,"value":113},"Agent 使用工具的常见模式是 ReAct（Reasoning-Action）交替循环。ReAct 在单步层面有效，但其线性结构无法自动完成“完成一轮后回到起点开始下一轮”的回环。Auto Research 恰恰需要这种回环。",{"type":18,"tag":26,"props":115,"children":116},{},[117],{"type":24,"value":118},"围绕 ReAct 外层的约束设计，常见两类方案：",{"type":18,"tag":37,"props":120,"children":121},{"style":39},[122],{"type":18,"tag":42,"props":123,"children":125},{"src":124,"style":45,"alt":7},"\u002Fcategory\u002Finformation\u002Ftechnology-blogs\u002Fbanner\u002F2026-5-29\u002F2.jpg",[],{"type":18,"tag":26,"props":127,"children":128},{},[129],{"type":24,"value":130},"Auto Research 的目标通常没有预设的终止阈值，同时要求过程可复现、可审计。上述任一方案均不充分。需要一种在端到端自驱与固定 workflow 之间可连续调节的 harness 设计。",{"type":18,"tag":68,"props":132,"children":134},{"id":133},"_02-状态机与-react-的分工",[135],{"type":24,"value":136},"02 状态机与 ReAct 的分工",{"type":18,"tag":26,"props":138,"children":139},{},[140],{"type":24,"value":141},"状态机位于固定流程与端到端自驱的中间地带。它不替代 Agent 做每步决策，仅规定：当前所处阶段、可转移的下一个阶段集合、需跨阶段保存的状态。",{"type":18,"tag":26,"props":143,"children":144},{},[145],{"type":24,"value":146},"阶段内部仍运行 ReAct：Agent 自主决定查阅哪些文件、调用哪些工具、如何修改代码、何时判定本阶段完成。状态机定义流程框架，Agent在框架内执行ReAct。",{"type":18,"tag":37,"props":148,"children":149},{"style":39},[150],{"type":18,"tag":42,"props":151,"children":153},{"src":152,"style":45,"alt":7},"\u002Fcategory\u002Finformation\u002Ftechnology-blogs\u002Fbanner\u002F2026-5-29\u002F3.jpg",[],{"type":18,"tag":26,"props":155,"children":156},{},[157],{"type":24,"value":158},"以算子优化任务为例，用户提供：reference.py（PyTorch实现，用作写法参考和精度验证）和kernel.py（triton-ascend 算子的初始实现）。ReAct Agent 在外层状态机控制下执行四阶段循环：plan → edit → eval → decision，逐轮调优 kernel。其中包含两类状态机：",{"type":18,"tag":85,"props":160,"children":161},{},[162,167],{"type":18,"tag":89,"props":163,"children":164},{},[165],{"type":24,"value":166},"Auto Research 主循环状态机：观察 → 修改 → 评测 → 回滚\u002F保留。",{"type":18,"tag":89,"props":168,"children":169},{},[170],{"type":24,"value":171},"PLAN 状态机：每次代码编辑对应一个落盘的 PLAN 项，将探索动作与反馈从对话上下文中剥离保存。",{"type":18,"tag":26,"props":173,"children":174},{},[175],{"type":24,"value":176},"连续失败时，框架自动挂起当前方向，启动一个上下文隔离的 Subagent 重新诊断并生成新方向。运行预算耗尽后输出总结报告。",{"type":18,"tag":37,"props":178,"children":179},{"style":39},[180],{"type":18,"tag":42,"props":181,"children":183},{"src":182,"style":45,"alt":7},"\u002Fcategory\u002Finformation\u002Ftechnology-blogs\u002Fbanner\u002F2026-5-29\u002F4.jpg",[],{"type":18,"tag":26,"props":185,"children":186},{},[187],{"type":24,"value":188},"在一个hybrid attention mask triton-ascend算子利用 Auto Research 迭代的过程中，外围框架实现了\"保留性能最好的版本(KEEP)，回滚性能退化(DISCARD)、精度不符\u002F编译错误(FAIL) 的版本”。图中曲线呈现的性能收益来自参数调整和写法改动，如：",{"type":18,"tag":26,"props":190,"children":191},{},[192],{"type":24,"value":193},"1.算法重排：合并 seed kernel 中的若干次连续访存，移除冗余条件分支；",{"type":18,"tag":26,"props":195,"children":196},{},[197,199,206,208,214],{"type":24,"value":198},"2.以 ",{"type":18,"tag":200,"props":201,"children":203},"code",{"className":202},[],[204],{"type":24,"value":205},"torch.ones + triu",{"type":24,"value":207}," 取代 ",{"type":18,"tag":200,"props":209,"children":211},{"className":210},[],[212],{"type":24,"value":213},"arange",{"type":24,"value":215}," 广播 ；",{"type":18,"tag":26,"props":217,"children":218},{},[219],{"type":24,"value":220},"3.去除冗余的 mask 构造。",{"type":18,"tag":26,"props":222,"children":223},{},[224],{"type":24,"value":225},"可见 Auto Research 在triton-ascend算子调优过程中，能够自行组合、探索各种可叠加的性能优化写法。",{"type":18,"tag":26,"props":227,"children":228},{},[229],{"type":24,"value":230},"在上述的 Auto Research 过程中，当 Agent 行为符合提示词描述时，状态机构成的约束几乎不被感知；约束实际发挥作用的时刻是 Agent 试图跳过计划、绕过评测、修改验收条件或提前结束时——harness 将流程拉回允许范围。",{"type":18,"tag":26,"props":232,"children":233},{},[234],{"type":24,"value":235},"该结构可推广至更一般的工作流：",{"type":18,"tag":85,"props":237,"children":238},{},[239,244],{"type":18,"tag":89,"props":240,"children":241},{},[242],{"type":24,"value":243},"对于端到端验证类任务，其目标仅在于最终输出是否通过验收，中间过程无需追溯。此时状态机可退化为单状态：进入后执行 ReAct 循环，输出通过验收则终止，否则继续运行。",{"type":18,"tag":89,"props":245,"children":246},{},[247],{"type":24,"value":248},"对于多阶段任务，则需为每个阶段明确定义交付物格式及阶段间的状态转移规则，Auto Research 即属此类。",{"type":18,"tag":26,"props":250,"children":251},{},[252],{"type":24,"value":253},"同一任务在不同审计要求下，harness 的形态存在显著差异。若要求全流程可审计，状态机应划分更细粒度，交付物的格式约束亦须更严格；若仅关注最终结果的有效性，一个简单的循环结构配合少量提示词即可满足需求。",{"type":18,"tag":68,"props":255,"children":257},{"id":256},"_03-上下文管理状态机带来的三个好处",[258],{"type":24,"value":259},"03 上下文管理：状态机带来的三个好处",{"type":18,"tag":26,"props":261,"children":262},{},[263],{"type":24,"value":264},"状态机的另一个重要作用，是把上下文管理从 Agent 自己维护的内容里独立出来一部分。以下是三个和上下文管理相关设计。",{"type":18,"tag":266,"props":267,"children":269},"h3",{"id":268},"_31-落盘的-plan-文件",[270],{"type":24,"value":271},"3.1 落盘的 PLAN 文件",{"type":18,"tag":26,"props":273,"children":274},{},[275],{"type":24,"value":276},"Auto Research 本质上是枚举式探索：将已有任务资料、性能观察、论文方法、社区经验和模型知识重组并逐一验证。每一尝试对应一个优化方向，每一次评测给出判断。有效探索的前提是：已失败的方向不再重复，未尝试的方向不被遗漏。这要求 Agent 长期保持对探索历史与结果的清晰认知。",{"type":18,"tag":26,"props":278,"children":279},{},[280],{"type":24,"value":281},"仅依赖对话历史难以满足：",{"type":18,"tag":85,"props":283,"children":284},{},[285,290,295],{"type":18,"tag":89,"props":286,"children":287},{},[288],{"type":24,"value":289},"历史迅速膨胀超出上下文窗口；",{"type":18,"tag":89,"props":291,"children":292},{},[293],{"type":24,"value":294},"\u002Fcompact 压缩后细节损失严重；",{"type":18,"tag":89,"props":296,"children":297},{},[298],{"type":24,"value":299},"即使未溢出，Agent 决策需要的是简短结论，而非原始日志和完整代码变动；",{"type":18,"tag":26,"props":301,"children":302},{},[303],{"type":24,"value":304},"PLAN 文件仅记录可复用、可比较的结论，如方向尝试、性能指标、失败原因摘要；原始评测输出、代码细节、重复的中间过程不进入 PLAN 文件，也不进入 Agent 上下文。这种记录方式比长度驱动的自动压缩更适配 Auto Research。",{"type":18,"tag":26,"props":306,"children":307},{},[308],{"type":24,"value":309},"Agent 对长上下文的依赖因此降低：上下文被压缩或从断点恢复时，仅需读取 PLAN 文件即可恢复历史信息。一个昇腾算子优化任务可直接从 PLAN 文件与 git 历史等落盘产物中在新会话内重建，无需依赖底层 ReAct Agent 的上下文管理能力。",{"type":18,"tag":266,"props":311,"children":313},{"id":312},"_32-subagent-的使用",[314],{"type":24,"value":315},"3.2 Subagent 的使用",{"type":18,"tag":26,"props":317,"children":318},{},[319],{"type":24,"value":320},"Subagent 在此设计中作为上下文隔离工具，用于两类场景：\n1、主 Agent 累积困境时切换视角：连续失败后，主 Agent 上下文被相似失败推理充斥，易陷入局部最优。启动一个零上下文的 Subagent 重新判断方向，比在原上下文内挣扎更有效。\n2、处理与主线无关的复杂子任务：如跨文件代码定位、多步检索。Subagent 完成后再返回端到端结论，避免挤占主 Agent 上下文。",{"type":18,"tag":26,"props":322,"children":323},{},[324],{"type":24,"value":325},"核心结论：多 Agent 拆解并不自动优于单 Agent。对于需要单一连贯输出的任务（如代码修改），多个 Agent 并行修改同一对象易导致风格冲突与重复失败。Auto Research 的主体执行始终由单一主 Agent 推进，仅在主 Agent 反复失败、需要新视角时才由 Subagent 临时介入。",{"type":18,"tag":266,"props":327,"children":329},{"id":328},"_33-端到端评测托管",[330],{"type":24,"value":331},"3.3 端到端评测托管",{"type":18,"tag":26,"props":333,"children":334},{},[335],{"type":24,"value":336},"Harness 设计应使 Agent 的 ReAct 动作集中于与任务直接相关的活动。以算子优化为例，Agent 最应进行两类操作：理解性能相关的资料、编写代码改动；这两者的产出直接服务于下一步判断。",{"type":18,"tag":26,"props":338,"children":339},{},[340],{"type":24,"value":341},"除此之外，每轮运行还有大量必须执行但无需推理的动作：",{"type":18,"tag":85,"props":343,"children":344},{},[345,350,355],{"type":18,"tag":89,"props":346,"children":347},{},[348],{"type":24,"value":349},"运行评测、解析结果",{"type":18,"tag":89,"props":351,"children":352},{},[353],{"type":24,"value":354},"判定改动优劣、保留或回滚版本",{"type":18,"tag":89,"props":356,"children":357},{},[358],{"type":24,"value":359},"等待工具输出、检查必要条件",{"type":18,"tag":26,"props":361,"children":362},{},[363],{"type":24,"value":364},"这些动作的触发条件和行为可预先确定，应由 harness 执行。若交由 Agent 执行，会产生两个问题：",{"type":18,"tag":85,"props":366,"children":367},{},[368,373],{"type":18,"tag":89,"props":369,"children":370},{},[371],{"type":24,"value":372},"这些动作的输出占用上下文，挤占真正有价值的性能信息",{"type":18,"tag":89,"props":374,"children":375},{},[376],{"type":24,"value":377},"Agent 有时会为了推进任务而修改这些步骤本身（如放宽验收容差、跳过检查或调整参照实现）",{"type":18,"tag":26,"props":379,"children":380},{},[381],{"type":24,"value":382},"将上述步骤交由 harness 后：",{"type":18,"tag":85,"props":384,"children":385},{},[386,391,396,401],{"type":18,"tag":89,"props":387,"children":388},{},[389],{"type":24,"value":390},"Agent 无法修改这些步骤，也无需为其分配上下文；",{"type":18,"tag":89,"props":392,"children":393},{},[394],{"type":24,"value":395},"Harness 负责回滚错误改动、保留最佳版本，Agent 每轮开始时均面对已验证的最佳代码状态；",{"type":18,"tag":89,"props":397,"children":398},{},[399],{"type":24,"value":400},"每轮问题的结构保持一致，Agent 无需额外上下文处理历史遗留；",{"type":18,"tag":89,"props":402,"children":403},{},[404],{"type":24,"value":405},"Harness 输出格式相对固定，可提高 LLM 调用的 KV Cache 命中率，降低 API 开销；",{"type":18,"tag":68,"props":407,"children":409},{"id":408},"_04-总-结",[410],{"type":24,"value":411},"04 总 结",{"type":18,"tag":26,"props":413,"children":414},{},[415],{"type":24,"value":416},"本文讨论了在保留 Agent 自主性的前提下，通过 harness 层面约束使 Agent 工作流更加可控的方案。",{"type":18,"tag":26,"props":418,"children":419},{},[420],{"type":24,"value":421},"Agent 的编排本身可视为一种新的编程范式：传统编程是设计确定性的、可预期产出的代码，使其主动执行；Agent 的自主性打开了确定性之外的空间，编程工作的重心相应地转移为约束 Agent 动作的可能空间。",{"type":18,"tag":26,"props":423,"children":424},{},[425],{"type":24,"value":426},"文中基于状态机的 harness 设计，正是这一新范式下一种可调节且对 Agent 行为亲和的具体实现方式。",{"type":18,"tag":68,"props":428,"children":430},{"id":429},"_05-社区与互动",[431],{"type":24,"value":432},"05 社区与互动",{"type":18,"tag":26,"props":434,"children":435},{},[436],{"type":24,"value":437},"昇思MindSpore AKG是面向多硬件的图算协同加速器组件，致力于持续引领业界先进算子编译生成技术，助力开源人工智能生态繁荣发展。我们始终秉持开源开放的合作理念，持续欢迎开发者提交Issue、贡献代码或分享迁移经验。无论您是企业用户还是个人开发者，都可以通过直接访问官方代码仓参与共建或加入AKG SIG参与交流。",{"type":18,"tag":37,"props":439,"children":440},{"style":39},[441],{"type":18,"tag":42,"props":442,"children":444},{"src":443,"style":45,"alt":7},"\u002Fcategory\u002Finformation\u002Ftechnology-blogs\u002Fbanner\u002F2026-5-29\u002F5.jpg",[],{"type":18,"tag":26,"props":446,"children":447},{},[448],{"type":24,"value":449},"欢迎您通过AtomGit Issues来提交问题、报告与建议。",{"type":18,"tag":26,"props":451,"children":452},{},[453],{"type":24,"value":454},"欢迎您通过社区论坛进行技术、问题交流。",{"type":18,"tag":26,"props":456,"children":457},{},[458],{"type":24,"value":459},"欢迎您通过Sig来管理和改善工作流程，参与讨论。",{"type":18,"tag":26,"props":461,"children":462},{},[463],{"type":24,"value":464},"让我们共同推动图算协同加速器的创新生态繁荣，让更多开发者受益于高效、易用的昇腾图算优化方案。",{"title":7,"searchDepth":466,"depth":466,"links":467},4,[468,470,471,477,478],{"id":70,"depth":469,"text":73},2,{"id":133,"depth":469,"text":136},{"id":256,"depth":469,"text":259,"children":472},[473,475,476],{"id":268,"depth":474,"text":271},3,{"id":312,"depth":474,"text":315},{"id":328,"depth":474,"text":331},{"id":408,"depth":469,"text":411},{"id":429,"depth":469,"text":432},"markdown","content:technology-blogs:zh:2026-5-29.md","content","technology-blogs\u002Fzh\u002F2026-5-29.md","technology-blogs\u002Fzh\u002F2026-5-29","md",1781776942224]