云服务器内容精选

  • 附录:config.json文件 config.json文件用于推理服务启动时,需要修改以下参数,2台机器的每个容器中config.json文件内容一致。 ipAddress:主节点IP地址,即rank_table_file.json文件中的server_id。 managementIpAddress:主节点IP地址,和ipAddress取值一致。 httpsEnabled:取值需要修改为false。 interCommTLSEnabled和interNodeTLSEnabled:如果不需要开启安全认证,这2个参数取值需要修改为false。 multiNodesInferEnabled:取值需要修改true,表示开启多机推理。 modelName:设置为DeepSeek-V3或DeepSeek-R1。 modelWeightPath:权重文件在容器内的地址,例如:${container_work_dir}/deepseekV3-w8a8或$${container_work_dir}/deepseekR1-w8a8目录。${container_work_dir}在步骤四:启动容器时定义。 maxPrefillBatchSize:最大prefill batch size。config.json文件中默认是50,并发请求数量超出设置,推理请求会被拒绝。用户可以根据实际修改。maxPrefillBatchSize和maxPrefillTokens谁先达到各自的取值就完成本次组batch。 maxSeqLen:输入长度+输出长度的最大值。该值为maxInputTokenLen+maxIterTimes的和。config.json文件中默认是16k,用户可以根据自己的推理场景设置。 maxInputTokenLen:输入最大长度。config.json文件中默认是15k,用户可以根据自己的推理场景设置。 maxPrefillTokens:最大prefill token数。和maxInputTokenLen保持相同。 maxIterTimes:最大输出长度。config.json文件中默认是1k,用户可以根据自己的推理场景设置。 当前在W8A8量化权重、2台Ascend Snt9B资源下支持的maxSeqLen最大为32768。 { "Version" : "1.0.0", "LogConfig" : { "logLevel" : "Info", "logFileSize" : 20, "logFileNum" : 20, "logPath" : "logs/mindie-server.log" }, "ServerConfig" : { "ipAddress" : "7.242.110.112", "managementIpAddress" : "7.242.110.112", "port" : 1025, "managementPort" : 1026, "metricsPort" : 1027, "allowAllZeroIpListening" : false, "maxLinkNum" : 1000, "httpsEnabled" : false, "fullTextEnabled" : false, "tlsCaPath" : "security/ca/", "tlsCaFile" : ["ca.pem"], "tlsCert" : "security/certs/server.pem", "tlsPk" : "security/keys/server.key.pem", "tlsPkPwd" : "security/pass/key_pwd.txt", "tlsCrlPath" : "security/certs/", "tlsCrlFiles" : ["server_crl.pem"], "managementTlsCaFile" : ["management_ca.pem"], "managementTlsCert" : "security/certs/management/server.pem", "managementTlsPk" : "security/keys/management/server.key.pem", "managementTlsPkPwd" : "security/pass/management/key_pwd.txt", "managementTlsCrlPath" : "security/management/certs/", "managementTlsCrlFiles" : ["server_crl.pem"], "kmcKsfMaster" : "tools/pmt/master/ksfa", "kmcKsfStandby" : "tools/pmt/standby/ksfb", "inferMode" : "standard", "interCommTLSEnabled" : false, "interCommPort" : 1121, "interCommTlsCaPath" : "security/grpc/ca/", "interCommTlsCaFiles" : ["ca.pem"], "interCommTlsCert" : "security/grpc/certs/server.pem", "interCommPk" : "security/grpc/keys/server.key.pem", "interCommPkPwd" : "security/grpc/pass/key_pwd.txt", "interCommTlsCrlPath" : "security/grpc/certs/", "interCommTlsCrlFiles" : ["server_crl.pem"], "openAiSupport" : "vllm" }, "BackendConfig" : { "backendName" : "mindieservice_llm_engine", "modelInstanceNumber" : 1, "npuDeviceIds" : [[0,1,2,3]], "tokenizerProcessNumber" : 8, "multiNodesInferEnabled" : true, "multiNodesInferPort" : 1120, "interNodeTLSEnabled" : false, "interNodeTlsCaPath" : "security/grpc/ca/", "interNodeTlsCaFiles" : ["ca.pem"], "interNodeTlsCert" : "security/grpc/certs/server.pem", "interNodeTlsPk" : "security/grpc/keys/server.key.pem", "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt", "interNodeTlsCrlPath" : "security/grpc/certs/", "interNodeTlsCrlFiles" : ["server_crl.pem"], "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa", "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb", "ModelDeployConfig" : { "maxSeqLen" : 16384, "maxInputTokenLen" : 15360, "truncation" : false, "ModelConfig" : [ { "modelInstanceType" : "Standard", "modelName" : "DeepSeek-V3", "modelWeightPath" : "/data/model/DeepSeek-V3-w8a8", "worldSize" : 4, "cpuMemSize" : 5, "npuMemSize" : -1, "backendType" : "atb", "trustRemoteCode" : false } ] }, "ScheduleConfig" : { "templateType" : "Standard", "templateName" : "Standard_LLM", "cacheBlockSize" : 128, "maxPrefillBatchSize" : 50, "maxPrefillTokens" : 15360, "prefillTimeMsPerReq" : 150, "prefillPolicyType" : 0, "decodeTimeMsPerReq" : 50, "decodePolicyType" : 0, "maxBatchSize" : 200, "maxIterTimes" : 1024, "maxPreemptCount" : 0, "supportSelectBatch" : false, "maxQueueDelayMicroseconds" : 5000 } } } 父主题: DeepSeek模型基于ModelArts Lite Server适配MindIE推理部署指导
  • 附录:rank_table_file.json文件 rank_table_file.json文件样例如下,需要根据实际修改server_count,device_ip,server_id,container_ip参数,每台机器上的rank_table_file.json文件内容一致。在步骤三:创建rank_table_file.json步骤中会用到。 server_count:节点个数。当前默认为2。 device_ip:当前卡的IP地址,2台机器共16张卡。device_ip查询命令 for i in {0..7};do hccn_tool -i $i -ip -g; done server_id:当前Server节点的IP地址,涉及2台机器。 container_ip:容器IP地址,无特殊配置时与server_id保存一致。 { "server_count": "2", "server_list": [ { "device": [ { "device_id": "0", "device_ip": "29.82.85.12", "rank_id": "0" }, { "device_id": "1", "device_ip": "29.82.98.67", "rank_id": "1" }, { "device_id": "2", "device_ip": "29.82.133.21", "rank_id": "2" }, { "device_id": "3", "device_ip": "29.82.175.69", "rank_id": "3" }, { "device_id": "4", "device_ip": "29.82.13.154", "rank_id": "4" }, { "device_id": "5", "device_ip": "29.82.140.51", "rank_id": "5" }, { "device_id": "6", "device_ip": "29.82.157.87", "rank_id": "6" }, { "device_id": "7", "device_ip": "29.82.15.225", "rank_id": "7" } ], "server_id": "7.242.110.112", "container_ip": "7.242.110.112" }, { "device": [ { "device_id": "0", "device_ip": "29.82.177.28", "rank_id": "8" }, { "device_id": "1", "device_ip": "29.82.41.231", "rank_id": "9" }, { "device_id": "2", "device_ip": "29.82.16.3", "rank_id": "10" }, { "device_id": "3", "device_ip": "29.82.154.20", "rank_id": "11" }, { "device_id": "4", "device_ip": "29.82.56.73", "rank_id": "12" }, { "device_id": "5", "device_ip": "29.82.177.138", "rank_id": "13" }, { "device_id": "6", "device_ip": "29.82.29.230", "rank_id": "14" }, { "device_id": "7", "device_ip": "29.82.1.176", "rank_id": "15" } ], "server_id": "7.242.104.54", "container_ip": "7.242.104.54" } ], "status": "completed", "version": "1.0" } 父主题: DeepSeek模型基于ModelArts Lite Server适配MindIE推理部署指导
  • 部署流程 表1 部署流程 部署任务 子任务 AR开局部署 开启SD-WAN服务 创建站点并添加设备 配置网络设计参数 配置WAN链路模板 配置站点接入WAN侧网络的链路 配置NTP 配置站点连接RR 邮件开局 确认开局成功 站点互联配置 配置Underlay网络 创建Overlay网络 确认配置是否成功 配置站点上网策略 应用管理 查看预定义应用 创建自定义应用 创建应用组 使用应用及应用组 业务体验优化策略 配置流量分配 配置QoS策略 配置智能选路策略 查看全网数据 查看全网数据 父主题: 部署指导
  • 部署流程 表1 部署流程 部署任务 子任务 AR开局部署 开启SD-WAN服务 创建站点并添加设备 配置网络设计参数 配置WAN链路模板 配置站点接入WAN侧网络的链路 配置NTP 配置站点连接RR 邮件开局 确认开局成功 站点互联配置 配置Underlay网络 创建Overlay网络 确认配置是否成功 配置站点上网策略 应用管理 查看预定义应用 创建自定义应用 创建应用组 使用应用及应用组 业务体验优化策略 配置流量分配 配置Overlay ACL策略 配置QoS策略 配置智能选路策略 查看全网数据 查看全网数据 父主题: 部署指导