[{"data":1,"prerenderedAt":397},["ShallowReactive",2],{"content-query-tCVl9AkQU8":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":391,"_id":392,"_source":393,"_file":394,"_stem":395,"_extension":396},"/technology-blogs/zh/3645","zh",false,"","基于昇腾+MindSpore，玩转HunyuanVideo-I2V图生视频！","介绍如何基于昇思MindSpore和Atlas 800T A2，完整实现HunyuanVideo-I2V从图像到视频生成的部署流程。","2025-03-14","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/5d7b6274e5a0443aa0b64e7c87204aa5.png","technology-blogs","实践",{"type":15,"children":16,"toc":383},"root",[17,25,31,44,57,68,77,113,122,127,136,141,148,153,158,163,173,178,183,191,196,205,215,222,256,265,273,282,289,297,302,310,319,324,329,334,342,351,356,363,372],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"基于昇腾mindspore玩转hunyuanvideo-i2v图生视频",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":24,"value":30},"HunyuanVideo-I2V是腾讯混元系列模型中一个新型的图像到视频转换模型。用户只需要提供一张图片，简要描述要生成的视频内容，例如动作描述，镜头位置等等，HunyuanVideo-I2V就可以让图片动起来，生成高清连贯的视频，最高支持720P 5s。",{"type":18,"tag":26,"props":32,"children":33},{},[34,36,42],{"type":24,"value":35},"昇思MindSpore团队现已完成对HunyuanVideo-I2V的适配，并将其开源至",{"type":18,"tag":37,"props":38,"children":39},"strong",{},[40],{"type":24,"value":41},"MindSpore ONE",{"type":24,"value":43},"仓库，本文将要给大家详细介绍，如何基于昇思MindSpore和Atlas 800T A2，完整实现HunyuanVideo-I2V从图像到视频生成的部署流程。",{"type":18,"tag":26,"props":45,"children":46},{},[47,49],{"type":24,"value":48},"MindSpore ONE开源代码仓链接：",{"type":18,"tag":50,"props":51,"children":55},"a",{"href":52,"rel":53},"https://github.com/mindspore-lab/mindone/tree/master/examples/hunyuanvideo-i2v",[54],"nofollow",[56],{"type":24,"value":52},{"type":18,"tag":26,"props":58,"children":59},{},[60,62],{"type":24,"value":61},"魔乐社区代码仓链接：",{"type":18,"tag":50,"props":63,"children":66},{"href":64,"rel":65},"https://modelers.cn/models/MindSpore-Lab/hyvideo",[54],[67],{"type":24,"value":64},{"type":18,"tag":19,"props":69,"children":71},{"id":70},"_01效果展示",[72],{"type":18,"tag":37,"props":73,"children":74},{},[75],{"type":24,"value":76},"# 01****效果展示",{"type":18,"tag":26,"props":78,"children":79},{},[80,85,87,91,92,96,97,101,102,106,108],{"type":18,"tag":81,"props":82,"children":84},"img",{"alt":7,"src":83},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/61af4a73d2bb44c4aaa8e6cda03853b3.png",[],{"type":24,"value":86}," ",{"type":18,"tag":81,"props":88,"children":90},{"alt":7,"src":89},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/fb733455d2c04592afa2528a723a404f.png",[],{"type":24,"value":86},{"type":18,"tag":81,"props":93,"children":95},{"alt":7,"src":94},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/14b2445a04604fe7a5f036ba101d7101.png",[],{"type":24,"value":86},{"type":18,"tag":81,"props":98,"children":100},{"alt":7,"src":99},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/86911573206e4599ab4c76d1d8c8f843.png",[],{"type":24,"value":86},{"type":18,"tag":81,"props":103,"children":105},{"alt":7,"src":104},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/6ce6457438034827b9cf53216dd6c4e2.png",[],{"type":24,"value":107}," （",{"type":18,"tag":50,"props":109,"children":112},{"href":110,"rel":111},"https://mp.weixin.qq.com/s/9C_hATfyTO2ealZqdH4YpQ",[54],[],{"type":18,"tag":26,"props":114,"children":115},{},[116],{"type":18,"tag":50,"props":117,"children":119},{"href":110,"rel":118},[54],[120],{"type":24,"value":121},"点击链接进行观看",{"type":18,"tag":26,"props":123,"children":124},{},[125],{"type":24,"value":126},"）",{"type":18,"tag":19,"props":128,"children":130},{"id":129},"_02模型介绍",[131],{"type":18,"tag":37,"props":132,"children":133},{},[134],{"type":24,"value":135},"# 02****模型介绍",{"type":18,"tag":26,"props":137,"children":138},{},[139],{"type":24,"value":140},"HunyuanVideo-I2V和HunyuanVideo的模型结构相似，都采取了双文本编码器（Llava和CLIP）和单双流DiT Block，以实现对文本和图像多模态信息的融合。HunyuanVideo的模型结构如下图所示：",{"type":18,"tag":26,"props":142,"children":143},{},[144],{"type":18,"tag":81,"props":145,"children":147},{"alt":7,"src":146},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/e062380a3d934ac2a76e0f5bba677323.png",[],{"type":18,"tag":26,"props":149,"children":150},{},[151],{"type":24,"value":152},"HunyuanVideo-I2V的总blocks数量是60，其中单流block数量为40， 双流block数量为20，模型的参数量更是达到了130亿。因此，使用HunyuanVideo-I2V生成720P视频的峰值NPU内存比较高，达到了62GB。推荐使用昇腾Atlas 800T A2机器（64GB），以获得更好的性能。",{"type":18,"tag":26,"props":154,"children":155},{},[156],{"type":24,"value":157},"HunyuanVideo-I2V在HunyuanVideo (T2V)的基础上，将参考图像的潜在特征和高斯噪声进行拼接和融合，作为DiT模型的输入，在每一步去噪的前后进行首帧图像潜在特征的替换。这样不仅实现了对参考图像的一致性保持，同时也丰富了生成视频的视觉效果。",{"type":18,"tag":26,"props":159,"children":160},{},[161],{"type":24,"value":162},"在代码实现上，关键的处理包括对于image latent和输入噪声的融合，如下：",{"type":18,"tag":164,"props":165,"children":167},"pre",{"code":166},"def prepare_latents:\n    ...\n    if i2v_mode and i2v_stability:\n        if img_latents.shape[2] == 1:  # img_latents是参考图像的潜在特征\n            img_latents = mint.tile(img_latents, (1, 1, video_length, 1, 1))\n        x0 = randn_tensor(shape, generator=generator, dtype=dtype)  # 高斯随机噪声\n        x1 = img_latents\n        t = ms.tensor([0.999])\n        latents = x0 * t + x1 * (1 - t)  # 对参考图像的潜在特征和高斯随机噪声进行混合\n        latents = latents.to(dtype=dtype)\n    if latents is None:\n        latents = randn_tensor(shape, generator=generator, dtype=dtype)\n",[168],{"type":18,"tag":169,"props":170,"children":171},"code",{"__ignoreMap":7},[172],{"type":24,"value":166},{"type":18,"tag":26,"props":174,"children":175},{},[176],{"type":24,"value":177},"可以看出，i2v_stability 为True时，会进行参考图像和随机噪声的潜在混合，以保持对参考图像的一致性。理论上， 当i2v_stability为False时，生成视频会包含更加丰富的动态信息。",{"type":18,"tag":26,"props":179,"children":180},{},[181],{"type":24,"value":182},"其次，在去噪的前后，对首帧图像的潜在特征进行替换：",{"type":18,"tag":164,"props":184,"children":186},{"code":185},"def __call__:\n    ...\n    if i2v_mode and i2v_condition_type == \"token_replace\":\n        latents = mint.concat([img_latents.to(latents.dtype), latents[:, :, 1:, :, :]], dim=2)  # 在去噪前进行首帧图像潜在特征的替换\n    ...\n    noise_pred = self.transformer(xxx)\n    ...\n    if i2v_mode and i2v_condition_type == \"token_replace\":\n        latents = self.scheduler.step(\n            noise_pred[:, :, 1:, :, :], t, latents[:, :, 1:, :, :], **extra_step_kwargs, return_dict=False\n        )[0]\n        latents = mint.concat([img_latents.to(latents.dtype), latents], dim=2)   # 在去噪后进行首帧图像潜在特征的替换\n",[187],{"type":18,"tag":169,"props":188,"children":189},{"__ignoreMap":7},[190],{"type":24,"value":185},{"type":18,"tag":26,"props":192,"children":193},{},[194],{"type":24,"value":195},"在每一步去噪的前后进行首帧图像潜在特征的替换，有利于对参考图像的一致性保持。",{"type":18,"tag":19,"props":197,"children":199},{"id":198},"_03模型介绍",[200],{"type":18,"tag":37,"props":201,"children":202},{},[203],{"type":24,"value":204},"# 03****模型介绍",{"type":18,"tag":206,"props":207,"children":209},"h2",{"id":208},"_1环境准备",[210],{"type":18,"tag":37,"props":211,"children":212},{},[213],{"type":24,"value":214},"1、环境准备",{"type":18,"tag":26,"props":216,"children":217},{},[218],{"type":18,"tag":81,"props":219,"children":221},{"alt":7,"src":220},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/2ca9044f0a6f46a5862deda20afdc86d.png",[],{"type":18,"tag":223,"props":224,"children":225},"ul",{},[226,242],{"type":18,"tag":227,"props":228,"children":229},"li",{},[230,232,236],{"type":24,"value":231},"CANN下载：",{"type":18,"tag":233,"props":234,"children":235},"br",{},[],{"type":18,"tag":50,"props":237,"children":240},{"href":238,"rel":239},"https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.0.RC3.beta1",[54],[241],{"type":24,"value":238},{"type":18,"tag":227,"props":243,"children":244},{},[245,247,250],{"type":24,"value":246},"MindSpore下载：",{"type":18,"tag":233,"props":248,"children":249},{},[],{"type":18,"tag":50,"props":251,"children":254},{"href":252,"rel":253},"https://www.mindspore.cn/install",[54],[255],{"type":24,"value":252},{"type":18,"tag":206,"props":257,"children":259},{"id":258},"_2安装依赖",[260],{"type":18,"tag":37,"props":261,"children":262},{},[263],{"type":24,"value":264},"2、安装依赖",{"type":18,"tag":164,"props":266,"children":268},{"code":267},"git clone https://github.com/mindspore-lab/mindone\ncd mindone/examples/hunyuanvideo-i2v\npip install -r requirements.txt\n",[269],{"type":18,"tag":169,"props":270,"children":271},{"__ignoreMap":7},[272],{"type":24,"value":267},{"type":18,"tag":206,"props":274,"children":276},{"id":275},"_3模型下载",[277],{"type":18,"tag":37,"props":278,"children":279},{},[280],{"type":24,"value":281},"3、模型下载",{"type":18,"tag":26,"props":283,"children":284},{},[285],{"type":18,"tag":81,"props":286,"children":288},{"alt":7,"src":287},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/969db9a564284770aa8dc0c3b8052c06.png",[],{"type":18,"tag":164,"props":290,"children":292},{"code":291},"FILE_PATH=\"/path/to/your/image.jpg\"\npython3 sample_image2video.py \\\n    --prompt \"An Asian man with short hair in black tactical uniform and white clothes waves a firework stick.\" \\\n    --i2v-image-path $FILE_PATH \\\n    --model HYVideo-T/2 \\\n    --i2v-mode \\\n    --i2v-resolution 720p \\\n    --i2v-stability \\\n    --infer-steps 50 \\\n    --video-length 129 \\\n    --flow-reverse \\\n    --flow-shift 7.0 \\\n    --seed 0 \\\n    --embedded-cfg-scale 6.0 \\\n    --save-path ./results \\\n",[293],{"type":18,"tag":169,"props":294,"children":295},{"__ignoreMap":7},[296],{"type":24,"value":291},{"type":18,"tag":26,"props":298,"children":299},{},[300],{"type":24,"value":301},"从 Hugging Face 下载所需的模型，可以参考如下命令：",{"type":18,"tag":164,"props":303,"children":305},{"code":304},"huggingface-cli download tencent/HunyuanVideo-I2V --local-dir ./ckpts\n",[306],{"type":18,"tag":169,"props":307,"children":308},{"__ignoreMap":7},[309],{"type":24,"value":304},{"type":18,"tag":206,"props":311,"children":313},{"id":312},"_4运行推理",[314],{"type":18,"tag":37,"props":315,"children":316},{},[317],{"type":24,"value":318},"4、运行推理",{"type":18,"tag":26,"props":320,"children":321},{},[322],{"type":24,"value":323},"进行图生视频推理也非常简单，运行下面的命令即可：",{"type":18,"tag":26,"props":325,"children":326},{},[327],{"type":24,"value":328},"如果想要自定义视频生成的内容，只需要修改--prompt，输入你脑海中的创意描述即可。另外，你还可以通过修改--i2v-resolution来控制输出的视频分辨率。目前可选择的分辨率有\"720p\", \"540p\"和\"360p\"。",{"type":18,"tag":26,"props":330,"children":331},{},[332],{"type":24,"value":333},"如果你想要尝试LoRA权重来实现更加丰富的视觉效果，只需要在上述的命令行中增加以下的参数即可：",{"type":18,"tag":164,"props":335,"children":337},{"code":336},"python3 sample_image2video.py \\\n   --prompt  $PROMPT \\\n   --i2v-image-path  $FILE_PATH \\\n   --lora-path ./ckpts/hunyuan-video-i2v-720p/lora/embrace_kohaya_weights.safetensors \\\n   --model HYVideo-T/2 \\\n   --i2v-mode \\\n   --i2v-resolution 720p \\\n   --i2v-stability \\\n   --infer-steps 50 \\\n   --video-length 129 \\\n   --flow-reverse \\\n   --flow-shift 5.0 \\\n   --embedded-cfg-scale 6.0 \\\n   --seed 0 \\\n   --save-path ./results \\\n   --use-lora \\\n   --lora-scale 1.0 \\\n",[338],{"type":18,"tag":169,"props":339,"children":340},{"__ignoreMap":7},[341],{"type":24,"value":336},{"type":18,"tag":19,"props":343,"children":345},{"id":344},"_04性能实测",[346],{"type":18,"tag":37,"props":347,"children":348},{},[349],{"type":24,"value":350},"# 04****性能实测",{"type":18,"tag":26,"props":352,"children":353},{},[354],{"type":24,"value":355},"基于Atlas 800T A2和MindSpore2.5.0的性能测试结果如下：",{"type":18,"tag":26,"props":357,"children":358},{},[359],{"type":18,"tag":81,"props":360,"children":362},{"alt":7,"src":361},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/21/a70dd0d9dc224e73b6d0febfa6187e8e.png",[],{"type":18,"tag":19,"props":364,"children":366},{"id":365},"_05马上体验",[367],{"type":18,"tag":37,"props":368,"children":369},{},[370],{"type":24,"value":371},"# 05****马上体验",{"type":18,"tag":26,"props":373,"children":374},{},[375,377],{"type":24,"value":376},"我们在魔乐社区上完成了Hunyuanvideo-i2v的部署！欢迎体验：",{"type":18,"tag":50,"props":378,"children":381},{"href":379,"rel":380},"https://modelers.cn/spaces/MindSpore-lab/hyvideo-i2v",[54],[382],{"type":24,"value":379},{"title":7,"searchDepth":384,"depth":384,"links":385},4,[386,388,389,390],{"id":208,"depth":387,"text":214},2,{"id":258,"depth":387,"text":264},{"id":275,"depth":387,"text":281},{"id":312,"depth":387,"text":318},"markdown","content:technology-blogs:zh:3645.md","content","technology-blogs/zh/3645.md","technology-blogs/zh/3645","md",1776506132730]