Skip to content

Commit bdf0c28

Browse files
authored
Merge pull request #5449 from ye-luo/pinned-mem
Pin host memory for fast/stable H2D and D2H transfers in non-batched delayed update implementation
2 parents 949e1cb + 8870373 commit bdf0c28

File tree

7 files changed

+43
-0
lines changed

7 files changed

+43
-0
lines changed

src/Containers/OhmmsPETE/OhmmsMatrix.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ class Matrix
132132
X.attachReference(other, ref, n * m);
133133
}
134134

135+
/// return true if this container is attached to another container
136+
inline bool isAttached() const { return X.isAttached(); }
137+
135138
template<typename Allocator = Alloc, typename = IsHostSafe<Allocator>>
136139
inline void add(size_type n) // you can add rows: adding columns are forbidden
137140
{

src/Containers/OhmmsPETE/OhmmsVector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,9 @@ class Vector
151151
qmc_allocator_traits<Alloc>::attachReference(other.mAllocator, mAllocator, ref - other.data());
152152
}
153153

154+
/// return true if this container is attached to another
155+
inline bool isAttached() const { return nLocal > 0 && nAllocated == 0; }
156+
154157
//! return the current size
155158
inline size_type size() const { return nLocal; }
156159

src/Containers/OhmmsSoA/VectorSoaContainer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,9 @@ struct VectorSoaContainer
200200
qmc_allocator_traits<Alloc>::attachReference(other.mAllocator, mAllocator, other.data(), ptr);
201201
}
202202

203+
/// return true if this container is attached to another container
204+
inline bool isAttached() { return nLocal > 0 && nAllocated == 0; }
205+
203206
///return the physical size
204207
inline size_type size() const { return nLocal; }
205208
///return the physical size

src/QMCWaveFunctions/Fermion/DelayedUpdate.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ class DelayedUpdate
6262
delay_list.resize(delay);
6363
}
6464

65+
inline void prepareForDeviceCopy(const Matrix<T>& mat) const {}
66+
67+
inline void releaseFromDeviceCopy(const Matrix<T>& mat) const {}
68+
6569
/** initialize internal objects when Ainv is refreshed
6670
* @param Ainv inverse matrix
6771
*/

src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,18 @@ class DelayedUpdateCUDA
8585
Ainv_gpu.resize(norb, norb);
8686
}
8787

88+
inline void prepareForDeviceCopy(Matrix<T>& mat) const
89+
{
90+
if(mat.size())
91+
compute::MemManage<PL>::registerHost(mat.data(), mat.size() * sizeof(T));
92+
}
93+
94+
inline void releaseFromDeviceCopy(Matrix<T>& mat) const
95+
{
96+
if(mat.size())
97+
compute::MemManage<PL>::unregisterHost(mat.data());
98+
}
99+
88100
/** compute the inverse of the transpose of matrix A and its determinant value in log
89101
* @tparam TREAL real type
90102
*/

src/QMCWaveFunctions/Fermion/DiracDeterminant.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ DiracDeterminant<PL, VT, FPVT>::DiracDeterminant(std::unique_ptr<SPOSet>&& spos,
4747
rot_spo->buildOptVariables(NumPtcls);
4848
}
4949

50+
template<PlatformKind PL, typename VT, typename FPVT>
51+
DiracDeterminant<PL, VT, FPVT>::~DiracDeterminant()
52+
{
53+
if(!psiM.isAttached())
54+
accel_engine_.update_eng_.releaseFromDeviceCopy(psiM);
55+
accel_engine_.update_eng_.releaseFromDeviceCopy(psiM_temp);
56+
}
57+
5058
template<PlatformKind PL, typename VT, typename FPVT>
5159
void DiracDeterminant<PL, VT, FPVT>::invertPsiM(const ValueMatrix& logdetT, ValueMatrix& invMat)
5260
{
@@ -115,6 +123,10 @@ void DiracDeterminant<PL, VT, FPVT>::resize(int nel, int morb)
115123
int norb = morb;
116124
if (norb <= 0)
117125
norb = nel; // for morb == -1 (default)
126+
127+
accel_engine_.update_eng_.releaseFromDeviceCopy(psiM);
128+
accel_engine_.update_eng_.releaseFromDeviceCopy(psiM_temp);
129+
118130
accel_engine_.update_eng_.resize(norb, ndelay_);
119131
psiM.resize(nel, norb);
120132
dpsiM.resize(nel, norb);
@@ -123,6 +135,9 @@ void DiracDeterminant<PL, VT, FPVT>::resize(int nel, int morb)
123135
invRow.resize(norb);
124136
psiM_temp.resize(nel, norb);
125137

138+
accel_engine_.update_eng_.prepareForDeviceCopy(psiM);
139+
accel_engine_.update_eng_.prepareForDeviceCopy(psiM_temp);
140+
126141
dpsiV.resize(NumOrbitals);
127142
dspin_psiV.resize(NumOrbitals);
128143
d2psiV.resize(NumOrbitals);
@@ -345,6 +360,7 @@ void DiracDeterminant<PL, VT, FPVT>::registerData(ParticleSet& P, WFBufferType&
345360
buf.add(d2psiM.first_address(), d2psiM.last_address());
346361
Bytes_in_WFBuffer = buf.current() - Bytes_in_WFBuffer;
347362
// free local space
363+
accel_engine_.update_eng_.releaseFromDeviceCopy(psiM);
348364
psiM.free();
349365
dpsiM.free();
350366
d2psiM.free();

src/QMCWaveFunctions/Fermion/DiracDeterminant.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ class DiracDeterminant : public DiracDeterminantBase
114114
int ndelay = 1,
115115
DetMatInvertor matrix_inverter_kind = DetMatInvertor::ACCEL);
116116

117+
~DiracDeterminant() override;
118+
117119
// copy constructor and assign operator disabled
118120
DiracDeterminant(const DiracDeterminant& s) = delete;
119121
DiracDeterminant& operator=(const DiracDeterminant& s) = delete;

0 commit comments

Comments
 (0)