弹性内存存储 EMS-同步加载KVCache:代码样例
时间:2025-06-19 16:52:48
代码样例
下面示例描述了同步加载KVCache流程,同时对返回异常进行处理。
import os, torch, torch_npu from ems import Ems, EmsConfig, EmsException, CcConfig, CcKvOption, KvBufferWrapper # 初始化cc配置 cc_config = CcConfig(rank_id=8, device_id=0, model_id="llama2-13b") # 初始化Ems config = EmsConfig(cc_config=cc_config) try: Ems.init(config) except EmsException as e: print(f"exception: {e}.") exit(1) # 获取context caching对象 cc = Ems.get_cc() if cc is None: print("cc is None.") exit(1) # 设置save请求的超时时间 option = CcKvOption(timeout=5000) # 组成键值列表 key_list = ["123", "66"] # 仅支持npu设备的tensor tensor1 = torch.ones(2, device="npu:1") tensor2 = torch.ones(6, device="npu:1") len1 = tensor1.numel() * tensor1.element_size() len2 = tensor2.numel() * tensor2.element_size() val_list = [[KvBufferWrapper(tensor1.data_ptr, len1)], [KvBufferWrapper(tensor2.data_ptr, len2)]] # 可以根据不同异常,采取不同处理方式,例如超时错误可以重试。 try: cc_result = cc.load(option, key_list, val_list) except EmsException as e: print(f"failed to load, {e}.") exit(2) print(f"succeed to load key num {cc_result.success}.") print(cc_result)
support.huaweicloud.com/sdk-python-devg-ems/obs_22_1804.html