Actual source code: cupmallocator.hpp

  1: #pragma once

  3: #include <petsc/private/cpp/object_pool.hpp>

  5: #include "../segmentedmempool.hpp"
  6: #include "cupmthrustutility.hpp"

  8: #include <thrust/device_ptr.h>
  9: #include <thrust/fill.h>

 11: #include <limits> // std::numeric_limits

 13: namespace Petsc
 14: {

 16: namespace device
 17: {

 19: namespace cupm
 20: {

 22: // ==========================================================================================
 23: // CUPM Host Allocator
 24: // ==========================================================================================

 26: template <DeviceType T, typename PetscType = char>
 27: class HostAllocator;

 29: // Allocator class to allocate pinned host memory for use with device
 30: template <DeviceType T, typename PetscType>
 31: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL HostAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
 32: public:
 33:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
 34:   using base_type       = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
 35:   using real_value_type = typename base_type::real_value_type;
 36:   using size_type       = typename base_type::size_type;
 37:   using value_type      = typename base_type::value_type;

 39:   template <typename U>
 40:   static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
 41:   template <typename U>
 42:   static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
 43:   template <typename U>
 44:   static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
 45: };

 47: template <DeviceType T, typename P>
 48: template <typename U>
 49: inline PetscErrorCode HostAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *) noexcept
 50: {
 51:   PetscFunctionBegin;
 52:   PetscCall(PetscCUPMMallocHost(ptr, n));
 53:   PetscFunctionReturn(PETSC_SUCCESS);
 54: }

 56: template <DeviceType T, typename P>
 57: template <typename U>
 58: inline PetscErrorCode HostAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *) noexcept
 59: {
 60:   PetscFunctionBegin;
 61:   PetscCallCUPM(cupmFreeHost(ptr));
 62:   PetscFunctionReturn(PETSC_SUCCESS);
 63: }

 65: template <DeviceType T, typename P>
 66: template <typename U>
 67: inline PetscErrorCode HostAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
 68: {
 69:   PetscFunctionBegin;
 70:   PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyHostToHost, stream->get_stream(), true));
 71:   PetscFunctionReturn(PETSC_SUCCESS);
 72: }

 74: // ==========================================================================================
 75: // CUPM Device Allocator
 76: // ==========================================================================================

 78: template <DeviceType T, typename PetscType = char>
 79: class DeviceAllocator;

 81: template <DeviceType T, typename PetscType>
 82: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL DeviceAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
 83: public:
 84:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
 85:   using base_type       = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
 86:   using real_value_type = typename base_type::real_value_type;
 87:   using size_type       = typename base_type::size_type;
 88:   using value_type      = typename base_type::value_type;

 90:   template <typename U>
 91:   static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
 92:   template <typename U>
 93:   static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
 94:   template <typename U>
 95:   static PetscErrorCode zero(value_type *, size_type, const StreamBase<U> *) noexcept;
 96:   template <typename U>
 97:   static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
 98:   template <typename U>
 99:   static PetscErrorCode set_canary(value_type *, size_type, const StreamBase<U> *) noexcept;
100: };

102: template <DeviceType T, typename P>
103: template <typename U>
104: inline PetscErrorCode DeviceAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *stream) noexcept
105: {
106:   PetscFunctionBegin;
107:   PetscCall(PetscCUPMMallocAsync(ptr, n, stream->get_stream()));
108:   PetscFunctionReturn(PETSC_SUCCESS);
109: }

111: template <DeviceType T, typename P>
112: template <typename U>
113: inline PetscErrorCode DeviceAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *stream) noexcept
114: {
115:   PetscFunctionBegin;
116:   PetscCallCUPM(cupmFreeAsync(ptr, stream->get_stream()));
117:   PetscFunctionReturn(PETSC_SUCCESS);
118: }

120: template <DeviceType T, typename P>
121: template <typename U>
122: inline PetscErrorCode DeviceAllocator<T, P>::zero(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
123: {
124:   PetscFunctionBegin;
125:   PetscCall(PetscCUPMMemsetAsync(ptr, 0, n, stream->get_stream(), true));
126:   PetscFunctionReturn(PETSC_SUCCESS);
127: }

129: template <DeviceType T, typename P>
130: template <typename U>
131: inline PetscErrorCode DeviceAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
132: {
133:   PetscFunctionBegin;
134:   PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyDeviceToDevice, stream->get_stream(), true));
135:   PetscFunctionReturn(PETSC_SUCCESS);
136: }

138: template <DeviceType T, typename P>
139: template <typename U>
140: inline PetscErrorCode DeviceAllocator<T, P>::set_canary(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
141: {
142:   using limit_t           = std::numeric_limits<real_value_type>;
143:   const value_type canary = limit_t::has_signaling_NaN ? limit_t::signaling_NaN() : limit_t::max();
144:   const auto       xptr   = thrust::device_pointer_cast(ptr);

146:   PetscFunctionBegin;
147:   PetscCallThrust(THRUST_CALL(thrust::fill, stream->get_stream(), xptr, xptr + n, canary));
148:   PetscFunctionReturn(PETSC_SUCCESS);
149: }

151: } // namespace cupm

153: } // namespace device

155: } // namespace Petsc