-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Open
Labels
Description
bug描述 Describe the Bug
paddle.zeros对于paddle.CUDAPinnedPlace()不生效
paddle.to_tensor指定dtype后paddle.CUDAPinnedPlace()不生效
import os
import time
import psutil
import numpy as np
import paddle
from pynvml import (
nvmlInit,
nvmlShutdown,
nvmlDeviceGetHandleByIndex,
nvmlDeviceGetMemoryInfo,
)
GPU_ID = 0 # 物理 2 号卡
# 初始化 NVML
nvmlInit()
handle = nvmlDeviceGetHandleByIndex(GPU_ID)
def cpu_mem_gb():
return psutil.virtual_memory().used / 1024**3
def gpu_mem_gb():
info = nvmlDeviceGetMemoryInfo(handle)
return info.used / 1024**3
print(f"Before CPU RAM: {cpu_mem_gb():.2f} GB")
print(f"Before GPU{GPU_ID} VRAM: {gpu_mem_gb():.2f} GB")
# 建议同时限制 Paddle 只用 2 号卡,避免起别的卡 context
paddle.device.set_device(f"gpu:{GPU_ID}")
# 分配普通 CPU 内存
arr = np.empty([1024, 1024, 256, 5], dtype="float32")
# 拷到 pinned memory
x = paddle.to_tensor(arr, place=paddle.CUDAPinnedPlace())
x_bf16 = paddle.to_tensor(arr, place=paddle.CUDAPinnedPlace(), dtype=paddle.bfloat16)
cpu_data = paddle.zeros(arr.shape, device=paddle.CUDAPinnedPlace())
print(f"x.place:",x.place)
print(f"x_bf16.place:",x_bf16.place)
print(f"cpu_data.place:",cpu_data.place)
print(f"After CPU RAM: {cpu_mem_gb():.2f} GB")
print(f"After GPU{GPU_ID} VRAM: {gpu_mem_gb():.2f} GB")
# print("Sleeping 60s... You can also run: nvidia-smi -i 2 -l 1")
# time.sleep(60)
nvmlShutdown()其他补充信息 Additional Supplementary Information
No response