弹性内存存储 EMS-同步加载KVCache:代码样例

时间:2025-06-19 16:52:48

代码样例

下面示例描述了同步加载KVCache流程,同时对返回异常进行处理。

import os, torch, torch_npu
from ems import Ems, EmsConfig, EmsException, CcConfig, CcKvOption, KvBufferWrapper
# 初始化cc配置
cc_config = CcConfig(rank_id=8, device_id=0, model_id="llama2-13b")
# 初始化Ems
config = EmsConfig(cc_config=cc_config)
try:
    Ems.init(config)
except EmsException as e:
    print(f"exception: {e}.")
    exit(1)
# 获取context caching对象
cc = Ems.get_cc()
if cc is None:
    print("cc is None.")
    exit(1)
# 设置save请求的超时时间
option = CcKvOption(timeout=5000)
# 组成键值列表
key_list = ["123", "66"]
# 仅支持npu设备的tensor
tensor1 = torch.ones(2, device="npu:1")
tensor2 = torch.ones(6, device="npu:1")
len1 = tensor1.numel() * tensor1.element_size()
len2 = tensor2.numel() * tensor2.element_size()
val_list = [[KvBufferWrapper(tensor1.data_ptr, len1)], [KvBufferWrapper(tensor2.data_ptr, len2)]]
# 可以根据不同异常,采取不同处理方式,例如超时错误可以重试。
try:
    cc_result = cc.load(option, key_list, val_list)
except EmsException as e:
    print(f"failed to load, {e}.")
    exit(2)
print(f"succeed to load key num {cc_result.success}.")
print(cc_result)
support.huaweicloud.com/sdk-python-devg-ems/obs_22_1804.html