[{"data":1,"prerenderedAt":358},["ShallowReactive",2],{"content-query-bBpo2wrFji":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":352,"_id":353,"_source":354,"_file":355,"_stem":356,"_extension":357},"/technology-blogs/zh/3628","zh",false,"","文生视频SOTA模型推理开箱即用！MindSpore支持Step-Video-T2V","该版本代码开源，同时完成模型推理支持开箱即用","2025-02-26","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/28/d7e92d9d52e04f298eeec994f4bec799.png","technology-blogs","实践",{"type":15,"children":16,"toc":349},"root",[17,25,34,39,55,75,83,88,98,105,120,149,154,164,178,183,188,193,198,202,211,220,225,234,243,248,256,263,268,273,281,286,294,302,307,314,321,326,331],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"文生视频sota模型推理开箱即用mindspore支持step-video-t2v",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":18,"tag":30,"props":31,"children":33},"img",{"alt":7,"src":32},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/28/8b093e334d6348b4ac9725fd5ddbcaaf.gif",[],{"type":18,"tag":26,"props":35,"children":36},{},[37],{"type":24,"value":38},"开发者基于昇思MindSpore AI框架实现了对阶跃星辰 (stepfun-ai ) 开源的 SOTA文生视频模型Step-Video-T2V支持，并将该版本代码开源，同时完成模型推理支持开箱即用。",{"type":18,"tag":26,"props":40,"children":41},{},[42,44,53],{"type":24,"value":43},"Step-Video-T2V 具有 30B 参数，能够生成204 帧 544p的高质量视频。为了提高效率，stepfun-ai 提出了一种用于视频的深度压缩 VAE，实现了 16x16 空间和 8 倍时间压缩比，并在最后阶段应用 Direct Preference Optimization (DPO) 进一步提高生成视频的视觉质量。Step-Video-T2V 在文生视频质量评估 benchmark Step-Video-T2V-Eval（",{"type":18,"tag":45,"props":46,"children":50},"a",{"href":47,"rel":48},"https://github.com/stepfun-ai/Step-Video-T2V/blob/main/benchmark/Step-Video-T2V-Eval%EF%BC%89%E4%B8%8A%EF%BC%8C%E7%9B%B8%E6%AF%94%E5%85%B6%E4%BB%96%E5%BC%80%E6%BA%90%E5%92%8C%E6%88%96%E5%95%86%E4%B8%9A%E6%A8%A1%E5%9E%8B%E5%B1%95%E7%A4%BA%E5%87%BA",[49],"nofollow",[51],{"type":24,"value":52},"https://github.com/stepfun-ai/Step-Video-T2V/blob/main/benchmark/Step-Video-T2V-Eval）上，相比其他开源和或商业模型展示出",{"type":24,"value":54}," SoTA 能力。",{"type":18,"tag":26,"props":56,"children":57},{},[58,60,66,68,73],{"type":24,"value":59},"本文将介绍基于",{"type":18,"tag":61,"props":62,"children":63},"strong",{},[64],{"type":24,"value":65},"昇思 MindSpore +单机",{"type":24,"value":67},"Atlas 800T A2使用",{"type":18,"tag":61,"props":69,"children":70},{},[71],{"type":24,"value":72},"Step-Video-T2V",{"type":24,"value":74},"的流程。",{"type":18,"tag":26,"props":76,"children":77},{},[78],{"type":18,"tag":61,"props":79,"children":80},{},[81],{"type":24,"value":82},"开源链接",{"type":18,"tag":26,"props":84,"children":85},{},[86],{"type":24,"value":87},"MindSpore 版 Step-Video-T2V 开源链接：",{"type":18,"tag":26,"props":89,"children":90},{},[91],{"type":18,"tag":45,"props":92,"children":95},{"href":93,"rel":94},"https://github.com/mindspore-lab/mindone/tree/master/examples/step%5C_video%5C_t2v",[49],[96],{"type":24,"value":97},"https://github.com/mindspore-lab/mindone/tree/master/examples/step\\_video\\_t2v",{"type":18,"tag":26,"props":99,"children":100},{},[101],{"type":18,"tag":30,"props":102,"children":104},{"alt":7,"src":103},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/28/0aeb13ccc0c6479c830b4f47711c8d7c.png",[],{"type":18,"tag":26,"props":106,"children":107},{},[108,113,115],{"type":18,"tag":61,"props":109,"children":110},{},[111],{"type":24,"value":112},"01",{"type":24,"value":114}," ",{"type":18,"tag":61,"props":116,"children":117},{},[118],{"type":24,"value":119},"环境准备",{"type":18,"tag":121,"props":122,"children":123},"ul",{},[124,137],{"type":18,"tag":125,"props":126,"children":127},"li",{},[128,130],{"type":24,"value":129},"Mindspore 2.5.0 + CANN 8.0.0.beta1（",{"type":18,"tag":45,"props":131,"children":134},{"href":132,"rel":133},"https://www.mindspore.cn/install%EF%BC%89",[49],[135],{"type":24,"value":136},"https://www.mindspore.cn/install）",{"type":18,"tag":125,"props":138,"children":139},{},[140,142],{"type":24,"value":141},"MindSpore ONE开源仓（支持 diffusers 等SOTA生成式模型",{"type":18,"tag":45,"props":143,"children":146},{"href":144,"rel":145},"https://github.com/mindspore-lab/mindone%EF%BC%89",[49],[147],{"type":24,"value":148},"https://github.com/mindspore-lab/mindone）",{"type":18,"tag":26,"props":150,"children":151},{},[152],{"type":24,"value":153},"运行以下命令安装依赖：",{"type":18,"tag":155,"props":156,"children":158},"pre",{"code":157},"\ngit clone https://github.com/mindspore-lab/mindone.git\n\n# install mindone\ncd mindone\npip install -e .\n\n# install stepvideo\ncd examples/step_video_t2v\npip install -e .\n",[159],{"type":18,"tag":160,"props":161,"children":162},"code",{"__ignoreMap":7},[163],{"type":24,"value":157},{"type":18,"tag":26,"props":165,"children":166},{},[167,172,173],{"type":18,"tag":61,"props":168,"children":169},{},[170],{"type":24,"value":171},"02",{"type":24,"value":114},{"type":18,"tag":61,"props":174,"children":175},{},[176],{"type":24,"value":177},"权重准备",{"type":18,"tag":26,"props":179,"children":180},{},[181],{"type":24,"value":182},"权重可从以下链接获取，按需选择原始模型或蒸馏模型，下载后放到 /path_to/stepfun-ai/stepvideo-t2v/目录。",{"type":18,"tag":26,"props":184,"children":185},{},[186],{"type":24,"value":187},"Models",{"type":18,"tag":26,"props":189,"children":190},{},[191],{"type":24,"value":192},"Huggingface",{"type":18,"tag":26,"props":194,"children":195},{},[196],{"type":24,"value":197},"Modelscope",{"type":18,"tag":26,"props":199,"children":200},{},[201],{"type":24,"value":72},{"type":18,"tag":26,"props":203,"children":204},{},[205],{"type":18,"tag":45,"props":206,"children":209},{"href":207,"rel":208},"https://huggingface.co/stepfun-ai/stepvideo-t2v",[49],[210],{"type":24,"value":207},{"type":18,"tag":26,"props":212,"children":213},{},[214],{"type":18,"tag":45,"props":215,"children":218},{"href":216,"rel":217},"https://www.modelscope.cn/models/stepfun-ai/stepvideo-t2v",[49],[219],{"type":24,"value":216},{"type":18,"tag":26,"props":221,"children":222},{},[223],{"type":24,"value":224},"Step-Video-T2V-Turbo (Inference Step Distillation)",{"type":18,"tag":26,"props":226,"children":227},{},[228],{"type":18,"tag":45,"props":229,"children":232},{"href":230,"rel":231},"https://huggingface.co/stepfun-ai/stepvideo-t2v-turbo",[49],[233],{"type":24,"value":230},{"type":18,"tag":26,"props":235,"children":236},{},[237],{"type":18,"tag":45,"props":238,"children":241},{"href":239,"rel":240},"https://www.modelscope.cn/models/stepfun-ai/stepvideo-t2v-turbo",[49],[242],{"type":24,"value":239},{"type":18,"tag":26,"props":244,"children":245},{},[246],{"type":24,"value":247},"下载完成后，使用以下命令把 hunyuan-clip 模型权重从 bin 格式转为 safetensors格式。",{"type":18,"tag":155,"props":249,"children":251},{"code":250},"\npython convert.py \\\n    --pt_filename /path_to/stepfun-ai/stepvideo-t2v/hunyuan_clip/clip_text_encoder/pytorch_model.bin \\\n    --sf_filename /path_to/stepfun-ai/stepvideo-t2v/hunyuan_clip/clip_text_encoder/model.safetensors \\\n    --config_path /path_to/stepfun-ai/stepvideo-t2v/hunyuan_clip/clip_text_encoder/config.json\n",[252],{"type":18,"tag":160,"props":253,"children":254},{"__ignoreMap":7},[255],{"type":24,"value":250},{"type":18,"tag":26,"props":257,"children":258},{},[259],{"type":18,"tag":30,"props":260,"children":262},{"alt":7,"src":261},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/28/39f0be6ab1e94f3b97f17a2f667bdb7c.png",[],{"type":18,"tag":26,"props":264,"children":265},{},[266],{"type":24,"value":267},"我们采用了文本编码器、VAE 解码和 DiT 的解耦策略，以优化 DiT 对 NPU 的资源利用率。因此，除了推理的4卡，我们使用了额外的 NPU 来运行文本编码器嵌入计算、 VAE 解码的 API 服务。",{"type":18,"tag":26,"props":269,"children":270},{},[271],{"type":24,"value":272},"首先分别使用单卡启动 vae / captioner 服务，把返回的 url 地址传给后面推理启动命令，地址一般为 127.0.0.1。",{"type":18,"tag":155,"props":274,"children":276},{"code":275},"\nmodel_dir='/path_to/stepfun-ai/stepvideo-t2v/'\n\n# (1) start vae/captioner server on single-card\n# !!! This command will return the URL for both the caption API and the VAE API. Please use the returned URL in the following command.\nASCEND_RT_VISIBLE_DEVICES=0 python api/call_remote_server.py --model_dir $model_dir --enable_vae True &\nASCEND_RT_VISIBLE_DEVICES=1 python api/call_remote_server.py --model_dir $model_dir --enable_llm True &\n# !!! wait...a moment, vae/llm is loading…\n",[277],{"type":18,"tag":160,"props":278,"children":279},{"__ignoreMap":7},[280],{"type":24,"value":275},{"type":18,"tag":26,"props":282,"children":283},{},[284],{"type":24,"value":285},"vae / captioner 服务加载完成后，另起 4卡 启动推理：",{"type":18,"tag":155,"props":287,"children":289},{"code":288},"\n# (2) setting and replace the `url` from before command print\nparallel=4\nsp=2\npp=2\nvae_url='127.0.0.1'\ncaption_url='127.0.0.1'\n",[290],{"type":18,"tag":160,"props":291,"children":292},{"__ignoreMap":7},[293],{"type":24,"value":288},{"type":18,"tag":155,"props":295,"children":297},{"code":296},"\n# (3) run parallel dit model on 4-cards \nASCEND_RT_VISIBLE_DEVICES=2,3,4,5 msrun --bind_core=True --worker_num=$parallel --local_worker_num=$parallel --master_port=9000 --log_dir=outputs/parallel_logs python -u \\\nrun_parallel.py \\\n    --model_dir $model_dir \\\n    --vae_url $vae_url \\\n    --caption_url $caption_url  \\\n    --ulysses_degree $sp \\\n    --pp_degree $pp \\\n    --prompt \"一名宇航员在月球上发现一块石碑，上面印有“stepfun”字样，闪闪发光\"\\\n  --infer_steps 50  \\\n  --cfg_scale 9.0 \\\n  --time_shift 13.0 \\\n  --num_frames 204 \\\n  --height 544 \\\n  --width 992\n",[298],{"type":18,"tag":160,"props":299,"children":300},{"__ignoreMap":7},[301],{"type":24,"value":296},{"type":18,"tag":26,"props":303,"children":304},{},[305],{"type":24,"value":306},"以下是推荐的参数配置，可达到比较好的推理效果：",{"type":18,"tag":26,"props":308,"children":309},{},[310],{"type":18,"tag":30,"props":311,"children":313},{"alt":7,"src":312},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/28/624f3a861e6c46be8417870afcd183a3.png",[],{"type":18,"tag":26,"props":315,"children":316},{},[317],{"type":18,"tag":30,"props":318,"children":320},{"alt":7,"src":319},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/28/ec5a767dc2aa4f2f98ed7962b54f30e4.png",[],{"type":18,"tag":26,"props":322,"children":323},{},[324],{"type":24,"value":325},"prompt: “一名宇航员在月球上发现一块石碑，上面印有“stepfun”字样，闪闪发光\"",{"type":18,"tag":26,"props":327,"children":328},{},[329],{"type":24,"value":330},"height/width/frame: 544px992px204f",{"type":18,"tag":26,"props":332,"children":333},{},[334,338,340,347],{"type":18,"tag":30,"props":335,"children":337},{"alt":7,"src":336},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/28/48a89fdd6e4f4deaaf7597bded7519b8.png",[],{"type":24,"value":339}," （",{"type":18,"tag":45,"props":341,"children":344},{"href":342,"rel":343},"https://mp.weixin.qq.com/s/sKf9I8iQJow0OnNjgXinMw",[49],[345],{"type":24,"value":346},"视频观看链接",{"type":24,"value":348},"）",{"title":7,"searchDepth":350,"depth":350,"links":351},4,[],"markdown","content:technology-blogs:zh:3628.md","content","technology-blogs/zh/3628.md","technology-blogs/zh/3628","md",1776506132411]