[{"data":1,"prerenderedAt":464},["ShallowReactive",2],{"content-query-GjZLlioPYl":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":458,"_id":459,"_source":460,"_file":461,"_stem":462,"_extension":463},"/technology-blogs/zh/2026-1-9","zh",false,"","昇思开放麦 | 从零开始，用昇思MindSpore 打造你的第一个 AI 艺术生成器","从零开始构建一个变分自编码器","2026-1-9","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/06/06/1a18a46ef03442ea8f8d83ba33b0a7af.png","technology-blogs","开发者说",{"type":15,"children":16,"toc":450},"root",[17,25,35,46,51,56,61,66,71,76,81,89,100,105,115,123,154,162,170,174,180,188,195,220,228,236,241,249,256,286,294,302,307,315,322,337,345,353,357,363,371,378,398,406,414,419,439],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"昇思开放麦-从零开始用昇思mindspore-打造你的第一个-ai-艺术生成器",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":18,"tag":30,"props":31,"children":32},"strong",{},[33],{"type":24,"value":34},"# 01",{"type":18,"tag":26,"props":36,"children":37},{},[38],{"type":18,"tag":30,"props":39,"children":40},{},[41],{"type":18,"tag":30,"props":42,"children":43},{},[44],{"type":24,"value":45},"背景介绍",{"type":18,"tag":26,"props":47,"children":48},{},[49],{"type":24,"value":50},"今天我们将一起探索如何使用昇思MindSpore 深度学习框架，从零开始构建一个变分自编码器（Variational Autoencoder, VAE），并用它来生成独特的数字艺术作品。",{"type":18,"tag":26,"props":52,"children":53},{},[54],{"type":24,"value":55},"这篇文章不仅会提供完整的代码，还会逐行解释其背后的原理和功能，让你不仅知其然，更知其所以然。",{"type":18,"tag":26,"props":57,"children":58},{},[59],{"type":24,"value":60},"什么是变分自编码器 (VAE)？",{"type":18,"tag":26,"props":62,"children":63},{},[64],{"type":24,"value":65},"在深入代码之前，我们先简单了解一下 VAE。VAE 是一种强大的生成模型，它由两部分组成：",{"type":18,"tag":26,"props":67,"children":68},{},[69],{"type":24,"value":70},"编码器 (Encoder)：它的任务是学习将输入数据（比如一张图片）压缩成一个潜在空间（Latent Space）中的分布。这个分布通常用均值（mu）和方差（log_var）来描述。",{"type":18,"tag":26,"props":72,"children":73},{},[74],{"type":24,"value":75},"解码器 (Decoder)：它的任务是从这个潜在空间中随机采样一个点，并将其解码（还原）成与原始输入数据相似的输出。",{"type":18,"tag":26,"props":77,"children":78},{},[79],{"type":24,"value":80},"训练完成后，我们就可以直接从潜在空间中随机采样，然后用解码器生成全新的、从未见过的图片。这就是我们实现 “AI 艺术生成” 的核心原理。",{"type":18,"tag":26,"props":82,"children":83},{},[84],{"type":18,"tag":30,"props":85,"children":86},{},[87],{"type":24,"value":88},"# 02",{"type":18,"tag":26,"props":90,"children":91},{},[92],{"type":18,"tag":30,"props":93,"children":94},{},[95],{"type":18,"tag":30,"props":96,"children":97},{},[98],{"type":24,"value":99},"环境设置",{"type":18,"tag":26,"props":101,"children":102},{},[103],{"type":24,"value":104},"首先，我们需要导入所有必要的库，并对 MindSpore 进行一些初始设置。",{"type":18,"tag":106,"props":107,"children":109},"pre",{"code":108},"import mindspore as ms\nimport mindspore.nn as nn\nimport mindspore.ops as ops\nfrom mindspore import Tensor, context\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport os\n\n# 设置CPU环境。如果你有GPU并已配置好，可以改为\"GPU\"以获得更快的训练速度\n# GRAPH_MODE表示使用静态图模式，这是MindSpore默认且高效的执行模式\ncontext.set_context(mode=context.GRAPH_MODE, device_target=\"CPU\")\n# 打印MindSpore版本和项目标题，确认环境配置正确\nprint(f\"MindSpore版本: {ms.__version__}\")\nprint(\"qianduanjidi-AI艺术生成器\")\n# 创建一个名为 \"qianduanjidi\" 的目录，用于存放所有生成的文件（模型、图片等）\n# exist_ok=True 表示如果目录已存在，则不会报错\ndemo_dir = \"qianduanjidi\"\nos.makedirs(demo_dir, exist_ok=True)\n# 设置随机种子。这是一个好习惯，可以确保你的实验结果是可复现的\nms.set_seed(42)\n",[110],{"type":18,"tag":111,"props":112,"children":113},"code",{"__ignoreMap":7},[114],{"type":24,"value":108},{"type":18,"tag":26,"props":116,"children":117},{},[118],{"type":18,"tag":30,"props":119,"children":120},{},[121],{"type":24,"value":122},"代码分析：",{"type":18,"tag":26,"props":124,"children":125},{},[126,128,132,134,137,139,142,144,147,149,152],{"type":24,"value":127},"我们导入了 MindSpore 及其核心模块 nn (神经网络) 和 ops (算子)。",{"type":18,"tag":129,"props":130,"children":131},"br",{},[],{"type":24,"value":133},"\nnumpy 用于数值计算，matplotlib.pyplot 用于绘图和可视化。",{"type":18,"tag":129,"props":135,"children":136},{},[],{"type":24,"value":138},"\nos 模块用于与操作系统交互，比如创建目录和读写文件。",{"type":18,"tag":129,"props":140,"children":141},{},[],{"type":24,"value":143},"\ncontext.set_context 是 MindSpore 的核心配置入口，这里我们指定了使用 CPU 和静态图模式。",{"type":18,"tag":129,"props":145,"children":146},{},[],{"type":24,"value":148},"\nos.makedirs 创建了我们的工作目录。",{"type":18,"tag":129,"props":150,"children":151},{},[],{"type":24,"value":153},"\nms.set_seed 固定了随机数生成器的种子，使得每次运行代码时，初始化的权重和随机数据都一样。",{"type":18,"tag":26,"props":155,"children":156},{},[157],{"type":18,"tag":30,"props":158,"children":159},{},[160],{"type":24,"value":161},"# 03",{"type":18,"tag":26,"props":163,"children":164},{},[165],{"type":18,"tag":30,"props":166,"children":167},{},[168],{"type":24,"value":169},"构建 VAE 模型",{"type":18,"tag":171,"props":172,"children":173},"h3",{"id":7},[],{"type":18,"tag":171,"props":175,"children":177},{"id":176},"接下来我们将定义-vae-的核心结构",[178],{"type":24,"value":179},"接下来，我们将定义 VAE 的核心结构。",{"type":18,"tag":106,"props":181,"children":183},{"code":182},"class SimpleVAE(nn.Cell):\n    \"\"\"变分自编码器\"\"\"\n    \n    def __init__(self, image_size=784, h_dim=400, z_dim=20):\n        super(SimpleVAE, self).__init__()\n        \n        # 编码器 (Encoder)\n        # 它是一个简单的神经网络，输入是展平的图片 (784像素)\n        # 输出是潜在空间分布的参数：mu和log_var (各20个维度)\n        self.encoder = nn.SequentialCell(\n            nn.Dense(image_size, h_dim),  # 全连接层：784 -> 400\n            nn.ReLU(),                    # 激活函数，增加非线性\n            nn.Dense(h_dim, z_dim * 2)    # 全连接层：400 -> 40 (20个mu + 20个log_var)\n        )\n       \n        # 解码器 (Decoder)\n        # 它从潜在空间的一个点 (z_dim=20) 开始，尝试还原出原始图片\n        self.decoder = nn.SequentialCell(\n            nn.Dense(z_dim, h_dim),        # 全连接层：20 -> 400\n            nn.ReLU(),                    # 激活函数\n            nn.Dense(h_dim, image_size),  # 全连接层：400 -> 784\n            nn.Sigmoid()                  # Sigmoid激活，确保输出值在0到1之间（像灰度图的像素值）\n        )\n    \n    def encode(self, x):\n        \"\"\"将输入x编码为mu和log_var\"\"\"\n        h = self.encoder(x)\n        mu, log_var = h[:, :20], h[:, 20:]  # 将输出的40个维度切分成两部分\n        return mu, log_var\n    \n    def reparameterize(self, mu, log_var):\n        \"\"\"\n        重参数化技巧 (Reparameterization Trick)\n        这是VAE的关键。为了让梯度能够顺畅地从解码器流回编码器，\n        我们不直接从 N(mu, sigma^2) 中采样，而是：\n        1. 从标准正态分布 N(0, 1) 中采样一个 eps\n        2. 计算 z = mu + eps * sigma (其中 sigma = exp(log_var / 2))\n        这样，采样过程就变得可微分了。\n        \"\"\"\n        std = ops.Exp()(log_var * 0.5)\n        eps = ops.StandardNormal()(std.shape)\n        return mu + eps * std\n    \n    def decode(self, z):\n        \"\"\"将潜在向量z解码为图片\"\"\"\n        return self.decoder(z)\n   \n    def construct(self, x):\n        \"\"\"\n        MindSpore模型的核心执行逻辑。\n        当你调用模型实例时，这个方法会被执行。\n        \"\"\"\n        mu, log_var = self.encode(x)\n        z = self.reparameterize(mu, log_var)\n        return self.decode(z), mu, log_var\n\n",[184],{"type":18,"tag":111,"props":185,"children":186},{"__ignoreMap":7},[187],{"type":24,"value":182},{"type":18,"tag":26,"props":189,"children":190},{},[191],{"type":18,"tag":30,"props":192,"children":193},{},[194],{"type":24,"value":122},{"type":18,"tag":26,"props":196,"children":197},{},[198,200,203,205,208,210,213,215,218],{"type":24,"value":199},"我们定义了一个继承自 nn.Cell 的 SimpleVAE 类，这是 MindSpore 中所有神经网络模型的基类。",{"type":18,"tag":129,"props":201,"children":202},{},[],{"type":24,"value":204},"\ninit 方法中定义了模型的层。encoder 和 decoder 都是 nn.SequentialCell，它可以将多个层按顺序组合起来。",{"type":18,"tag":129,"props":206,"children":207},{},[],{"type":24,"value":209},"\nencode, reparameterize, decode 方法将 VAE 的前向传播过程分解为清晰的步骤。",{"type":18,"tag":129,"props":211,"children":212},{},[],{"type":24,"value":214},"\n重参数化技巧是 VAE 最重要的创新，它解决了随机采样过程不可微分的问题，使得整个模型可以被端到端地训练。",{"type":18,"tag":129,"props":216,"children":217},{},[],{"type":24,"value":219},"\nconstruct 方法是模型的 “入口”。当我们把数据送入模型时，MindSpore 会自动调用这个方法。它返回了三个值：重建的图片、均值 mu 和对数方差 log_var。",{"type":18,"tag":26,"props":221,"children":222},{},[223],{"type":18,"tag":30,"props":224,"children":225},{},[226],{"type":24,"value":227},"# 04",{"type":18,"tag":26,"props":229,"children":230},{},[231],{"type":18,"tag":30,"props":232,"children":233},{},[234],{"type":24,"value":235},"定义训练逻辑",{"type":18,"tag":26,"props":237,"children":238},{},[239],{"type":24,"value":240},"MindSpore 中，我们通常将训练逻辑封装在另一个 Cell 中，称为 “训练器”。",{"type":18,"tag":106,"props":242,"children":244},{"code":243},"class VAETrainer(nn.Cell):\n    \"\"\"\n    训练器Cell，负责定义单步训练的逻辑。\n    \"\"\"\n    \n    def __init__(self, network, optimizer):\n        super(VAETrainer, self).__init__(auto_prefix=False)\n        self.network = network  # 我们的VAE模型\n        self.optimizer = optimizer  # 优化器，如Adam\n        self.weights = self.optimizer.parameters  # 需要被优化的模型参数\n        self.grad = ops.GradOperation(get_by_list=True)  # 用于计算梯度的算子\n    \n    def construct(self, x):\n        # 1. 前向传播：将输入x送入VAE模型\n        reconstructed, mu, log_var = self.network(x)\n        \n        # 2. 计算损失函数 (Loss Function)\n        # VAE的损失由两部分组成：重建损失和KL散度损失\n        # a. 重建损失 (Reconstruction Loss)：衡量重建的图片与原始图片的相似度\n        recon_loss = ops.reduce_mean((reconstructed - x) ** 2) # MSE损失\n        \n        # b. KL散度损失 (KL Divergence Loss)：衡量编码器输出的分布与标准正态分布的差异\n        # 这部分损失起到了正则化的作用，防止模型过拟合，并鼓励潜在空间具有良好的结构\n        kl_loss = -0.5 * ops.reduce_mean(1 + log_var - mu**2 - ops.exp(log_var))\n       \n        # 总损失 = 重建损失 + KL损失 (通常会给KL损失一个较小的权重)\n        total_loss = recon_loss + 0.001 * kl_loss\n        \n        # 3. 反向传播：计算梯度\n        grads = self.grad(self.network, self.weights)(x)\n        \n        # 4. 更新参数：使用优化器根据梯度更新模型权重\n        self.optimizer(grads)\n        \n        # 返回单步的总损失\n        return total_loss\n",[245],{"type":18,"tag":111,"props":246,"children":247},{"__ignoreMap":7},[248],{"type":24,"value":243},{"type":18,"tag":26,"props":250,"children":251},{},[252],{"type":18,"tag":30,"props":253,"children":254},{},[255],{"type":24,"value":122},{"type":18,"tag":26,"props":257,"children":258},{},[259,261,264,266,269,271,274,276,279,281,284],{"type":24,"value":260},"VAETrainer 同样继承自 nn.Cell。它接收一个模型 (network) 和一个优化器 (optimizer) 作为输入。",{"type":18,"tag":129,"props":262,"children":263},{},[],{"type":24,"value":265},"\nconstruct 方法定义了单批次数据的训练流程：",{"type":18,"tag":129,"props":267,"children":268},{},[],{"type":24,"value":270},"\n前向传播：得到模型输出。",{"type":18,"tag":129,"props":272,"children":273},{},[],{"type":24,"value":275},"\n计算损失：VAE 的损失是重建损失和KL 散度损失的总和。",{"type":18,"tag":129,"props":277,"children":278},{},[],{"type":24,"value":280},"\n反向传播：ops.GradOperation 是一个非常强大的工具，它会自动计算 self.network 在输入 x 下的梯度。",{"type":18,"tag":129,"props":282,"children":283},{},[],{"type":24,"value":285},"\n参数更新：调用 self.optimizer(grads) 来更新模型的权重。",{"type":18,"tag":26,"props":287,"children":288},{},[289],{"type":18,"tag":30,"props":290,"children":291},{},[292],{"type":24,"value":293},"# 05",{"type":18,"tag":26,"props":295,"children":296},{},[297],{"type":18,"tag":30,"props":298,"children":299},{},[300],{"type":24,"value":301},"创建合成数据集",{"type":18,"tag":26,"props":303,"children":304},{},[305],{"type":24,"value":306},"为了简化演示，我们不使用真实的 MNIST 数据集，而是生成一些简单的合成数据 —— 随机的圆形。",{"type":18,"tag":106,"props":308,"children":310},{"code":309},"def create_simple_dataset(num_samples=600):\n    \"\"\"创建一个由简单圆形组成的合成数据集\"\"\"\n    print(\"创建合成数据集...\")\n    images = []\n    \n    for i in range(num_samples):\n        # 创建一个28x28的全零矩阵，代表一张黑色图片\n        img = np.zeros((28, 28), dtype=np.float32)\n        # 随机选择圆形的中心点\n        center_x, center_y = np.random.randint(8, 20), np.random.randint(8, 20)\n        \n        # 在图片上绘制一个圆形\n        for x in range(28):\n            for y in range(28):\n                # 计算点 (x,y) 到圆心的距离\n                dist = np.sqrt((x - center_x)**2 + (y - center_y)**2)\n                # 如果距离小于6，则该点为白色（值为0.8到1.0之间的随机数）\n                if dist \u003C 6:\n                    img[x, y] = 0.8 + np.random.rand() * 0.2\n       \n        # 将28x28的矩阵展平成一个784维的向量，并添加到列表中\n        images.append(img.flatten())\n    \n    # 将列表转换为NumPy数组并返回\n    return np.array(images)\n",[311],{"type":18,"tag":111,"props":312,"children":313},{"__ignoreMap":7},[314],{"type":24,"value":309},{"type":18,"tag":26,"props":316,"children":317},{},[318],{"type":18,"tag":30,"props":319,"children":320},{},[321],{"type":24,"value":122},{"type":18,"tag":26,"props":323,"children":324},{},[325,327,330,332,335],{"type":24,"value":326},"这个函数非常直观，它通过双重循环在一个 28x28 的网格上绘制圆形。",{"type":18,"tag":129,"props":328,"children":329},{},[],{"type":24,"value":331},"\n每个样本都是一个代表 “数字 0” 的抽象圆形，位置和亮度略有不同。",{"type":18,"tag":129,"props":333,"children":334},{},[],{"type":24,"value":336},"\n最后，img.flatten() 将二维图像变成一维向量，这是我们之前定义的 Dense 层所期望的输入格式。",{"type":18,"tag":26,"props":338,"children":339},{},[340],{"type":18,"tag":30,"props":341,"children":342},{},[343],{"type":24,"value":344},"# 06",{"type":18,"tag":26,"props":346,"children":347},{},[348],{"type":18,"tag":30,"props":349,"children":350},{},[351],{"type":24,"value":352},"主训练与生成流程",{"type":18,"tag":171,"props":354,"children":356},{"id":355},"_1",[],{"type":18,"tag":171,"props":358,"children":360},{"id":359},"现在我们将所有部分组装起来编写主训练循环和后续的生成评估代码",[361],{"type":24,"value":362},"现在，我们将所有部分组装起来，编写主训练循环和后续的生成、评估代码。",{"type":18,"tag":106,"props":364,"children":366},{"code":365},"def train_simple_vae():\n    \"\"\"训练VAE模型的主函数\"\"\"\n    print(\"开始训练VAE...\")\n    \n    # 1. 实例化模型和优化器\n    vae = SimpleVAE()\n    optimizer = nn.Adam(vae.trainable_params(), learning_rate=1e-3)\n    \n    # 2. 实例化训练器\n    trainer = VAETrainer(vae, optimizer)\n    \n    # 3. 获取数据\n    images = create_simple_dataset()\n    \n    # 4. 训练循环\n    epochs = 20  # 训练轮数\n    losses = []  # 用于记录每一轮的平均损失\n    \n    for epoch in range(epochs):\n        total_loss = 0\n        batch_size = 64\n        batch_count = 0\n        \n        # 遍历数据集，按批次进行训练\n        for i in range(0, len(images), batch_size):\n            # 提取一个批次的数据\n            batch_data = images[i:min(i+batch_size, len(images))]\n            # 将NumPy数组转换为MindSpore的Tensor\n            batch_tensor = Tensor(batch_data, ms.float32)\n            \n            # 执行单步训练，并获取损失\n            loss = trainer(batch_tensor)\n            \n            # 累积损失\n            total_loss += loss.asnumpy()\n            batch_count += 1\n        \n        # 计算并记录本轮的平均损失\n        avg_loss = total_loss / batch_count\n        losses.append(avg_loss)\n        \n        # 每5轮打印一次损失信息\n        if (epoch + 1) % 5 == 0:\n            print(f\"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}\")\n    \n    print(\"训练完成!\")\n    # 返回训练好的模型和损失历史\n    return vae, losses\n\n# ... (此处省略 save_model_simple, generate_and_save_images, plot_training_loss, \n#      demo_model_loading, list_demo_files 等辅助函数的定义，它们的代码在文末完整版本中) ...\ndef main():\n    \"\"\"程序的主入口\"\"\"\n    print(\"=\" * 50)\n    print(\"MindSpore AI艺术生成器\")\n    print(\"=\" * 50)\n   \n    # 1. 训练模型\n    vae, losses = train_simple_vae()\n   \n    # 2. 保存训练好的模型\n    save_model_simple(vae, 'vae_model.ckpt')\n   \n    # 3. 使用训练好的模型生成艺术图像\n    generate_and_save_images(vae)\n    \n    # 4. 绘制并保存训练损失曲线\n    plot_training_loss(losses)\n    \n    # 5. 演示如何加载已保存的模型并进行生成\n    demo_model_loading()\n    \n    # 6. 列出所有生成的文件\n    list_demo_files()\n    \n    print(\"\\n🎉 所有任务完成!\")\n    print(f\"📁 请查看 '{demo_dir}' 文件夹中的结果文件\")\n# 当直接运行此脚本时，调用main函数\nif __name__ == \"__main__\":\n    main()\n",[367],{"type":18,"tag":111,"props":368,"children":369},{"__ignoreMap":7},[370],{"type":24,"value":365},{"type":18,"tag":26,"props":372,"children":373},{},[374],{"type":18,"tag":30,"props":375,"children":376},{},[377],{"type":24,"value":122},{"type":18,"tag":26,"props":379,"children":380},{},[381,383,386,388,391,393,396],{"type":24,"value":382},"train_simple_vae 函数组织了完整的训练流程：实例化模型、准备数据、然后在一个 for 循环中迭代训练。",{"type":18,"tag":129,"props":384,"children":385},{},[],{"type":24,"value":387},"\n在每一轮（epoch）中，我们又会按批次（batch）处理数据。这是深度学习训练的标准模式。",{"type":18,"tag":129,"props":389,"children":390},{},[],{"type":24,"value":392},"\ntrainer(batch_tensor) 这一行是关键，它触发了我们在 VAETrainer 的 construct 方法中定义的所有计算： 前向传播、损失计算、反向传播和参数更新。",{"type":18,"tag":129,"props":394,"children":395},{},[],{"type":24,"value":397},"\nmain 函数是整个程序的调度中心，它按顺序调用了训练、保存、生成、可视化等所有功能。",{"type":18,"tag":26,"props":399,"children":400},{},[401],{"type":18,"tag":30,"props":402,"children":403},{},[404],{"type":24,"value":405},"# 07",{"type":18,"tag":26,"props":407,"children":408},{},[409],{"type":18,"tag":30,"props":410,"children":411},{},[412],{"type":24,"value":413},"运行结果",{"type":18,"tag":26,"props":415,"children":416},{},[417],{"type":24,"value":418},"程序运行结束后，会在当前目录下创建一个名为 qianduanjidi 的文件夹，里面包含了：",{"type":18,"tag":26,"props":420,"children":421},{},[422,424,427,429,432,434,437],{"type":24,"value":423},"vae_model.ckpt: 保存的模型权重文件。",{"type":18,"tag":129,"props":425,"children":426},{},[],{"type":24,"value":428},"\ngenerated_art.png: 由训练好的模型生成的 16 幅艺术图像。",{"type":18,"tag":129,"props":430,"children":431},{},[],{"type":24,"value":433},"\ntraining_loss.png: 训练过程中的损失变化曲线。",{"type":18,"tag":129,"props":435,"children":436},{},[],{"type":24,"value":438},"\ngenerated_art_from_loaded_model.png: 加载模型后生成的另一组图像，用于验证模型加载功能。",{"type":18,"tag":440,"props":441,"children":443},"div",{"style":442},"text-align: center;",[444],{"type":18,"tag":445,"props":446,"children":449},"img",{"src":447,"style":448,"alt":7},"/category/information/technology-blogs/banner/2026-1-9/1.jpg","display: block;margin: 0 auto;max-width:70%",[],{"title":7,"searchDepth":451,"depth":451,"links":452},4,[453,455,456,457],{"id":7,"depth":454,"text":7},3,{"id":176,"depth":454,"text":179},{"id":355,"depth":454,"text":7},{"id":359,"depth":454,"text":362},"markdown","content:technology-blogs:zh:2026-1-9.md","content","technology-blogs/zh/2026-1-9.md","technology-blogs/zh/2026-1-9","md",1776506119309]