Actual source code: segmentedmempool.hpp

  1: #pragma once

  3: #include <petsc/private/deviceimpl.h>

  5: #include <petsc/private/cpp/macros.hpp>
  6: #include <petsc/private/cpp/type_traits.hpp>
  7: #include <petsc/private/cpp/utility.hpp>
  8: #include <petsc/private/cpp/register_finalize.hpp>
  9: #include <petsc/private/cpp/memory.hpp>

 11: #include <limits>
 12: #include <deque>
 13: #include <vector>

 15: namespace Petsc
 16: {

 18: namespace device
 19: {

 21: template <typename T>
 22: class StreamBase {
 23: public:
 24:   using id_type      = int;
 25:   using derived_type = T;

 27:   static const id_type INVALID_ID;

 29:   // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion
 30:   template <typename U = T>
 31:   PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_());

 33:   PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); }

 35:   template <typename E>
 36:   PetscErrorCode record_event(E &&event) const noexcept
 37:   {
 38:     return static_cast<const T &>(*this).record_event_(std::forward<E>(event));
 39:   }

 41:   template <typename E>
 42:   PetscErrorCode wait_for_event(E &&event) const noexcept
 43:   {
 44:     return static_cast<const T &>(*this).wait_for_(std::forward<E>(event));
 45:   }

 47: protected:
 48:   constexpr StreamBase() noexcept = default;

 50:   struct default_event_type { };
 51:   using default_stream_type = std::nullptr_t;

 53:   PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; }

 55:   PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; }

 57:   template <typename U = T>
 58:   static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept
 59:   {
 60:     return PETSC_SUCCESS;
 61:   }

 63:   template <typename U = T>
 64:   static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept
 65:   {
 66:     return PETSC_SUCCESS;
 67:   }
 68: };

 70: template <typename T>
 71: const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1;

 73: struct DefaultStream : StreamBase<DefaultStream> {
 74:   using stream_type = typename StreamBase<DefaultStream>::default_stream_type;
 75:   using id_type     = typename StreamBase<DefaultStream>::id_type;
 76:   using event_type  = typename StreamBase<DefaultStream>::default_event_type;
 77: };

 79: } // namespace device

 81: namespace memory
 82: {

 84: namespace impl
 85: {

 87: // ==========================================================================================
 88: // MemoryChunk
 89: //
 90: // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning
 91: // MemoryBlock and its size/capacity
 92: // ==========================================================================================

 94: template <typename EventType>
 95: class MemoryChunk {
 96: public:
 97:   using event_type = EventType;
 98:   using size_type  = std::size_t;

100:   MemoryChunk(size_type, size_type) noexcept;
101:   explicit MemoryChunk(size_type) noexcept;

103:   MemoryChunk(MemoryChunk &&) noexcept;
104:   MemoryChunk &operator=(MemoryChunk &&) noexcept;

106:   MemoryChunk(const MemoryChunk &) noexcept            = delete;
107:   MemoryChunk &operator=(const MemoryChunk &) noexcept = delete;

109:   PETSC_NODISCARD size_type start() const noexcept { return start_; }
110:   PETSC_NODISCARD size_type size() const noexcept { return size_; }
111:   // REVIEW ME:
112:   // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in
113:   // theory only the last chunk needs to do this
114:   PETSC_NODISCARD size_type capacity() const noexcept { return size_; }
115:   PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); }

117:   template <typename U>
118:   PetscErrorCode release(const device::StreamBase<U> *) noexcept;
119:   template <typename U>
120:   PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept;
121:   template <typename U>
122:   PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept;
123:   PetscErrorCode       resize(size_type) noexcept;
124:   PETSC_NODISCARD bool contains(size_type) const noexcept;

126: private:
127:   event_type      event_{};                                       // event recorded when the chunk was released
128:   bool            open_      = true;                              // is this chunk open?
129:   int             stream_id_ = device::DefaultStream::INVALID_ID; // id of the last stream to use the chunk, populated on release
130:   size_type       size_      = 0;                                 // size of the chunk
131:   const size_type start_     = 0;                                 // offset from the start of the owning block

133:   template <typename U>
134:   PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept;
135: };

137: // ==========================================================================================
138: // MemoryChunk - Private API
139: // ==========================================================================================

141: // asks and answers the question: can this stream claim this chunk without serializing?
142: template <typename E>
143: template <typename U>
144: inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept
145: {
146:   return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id());
147: }

149: // ==========================================================================================
150: // MemoryChunk - Public API
151: // ==========================================================================================

153: template <typename E>
154: inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start)
155: {
156: }

158: template <typename E>
159: inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size)
160: {
161: }

163: template <typename E>
164: inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept :
165:   event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_))
166: {
167: }

169: template <typename E>
170: inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept
171: {
172:   PetscFunctionBegin;
173:   if (this != &other) {
174:     event_     = std::move(other.event_);
175:     open_      = util::exchange(other.open_, false);
176:     stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID);
177:     size_      = util::exchange(other.size_, 0);
178:     start_     = std::move(other.start_);
179:   }
180:   PetscFunctionReturn(*this);
181: }

183: /*
184:   MemoryChunk::release - release a chunk on a stream

186:   Input Parameter:
187: . stream - the stream to release the chunk with

189:   Notes:
190:   Inserts a release operation on stream and records the state of stream at the time this
191:   routine was called.

193:   Future allocation requests which attempt to claim the chunk on the same stream may re-acquire
194:   the chunk without serialization.

196:   If another stream attempts to claim the chunk they must wait for the recorded event before
197:   claiming the chunk.
198: */
199: template <typename E>
200: template <typename U>
201: inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept
202: {
203:   PetscFunctionBegin;
204:   open_      = true;
205:   stream_id_ = stream->get_id();
206:   PetscCall(stream->record_event(event_));
207:   PetscFunctionReturn(PETSC_SUCCESS);
208: }

210: /*
211:   MemoryChunk::claim - attempt to claim a particular chunk

213:   Input Parameters:
214: + stream    - the stream on which to attempt to claim
215: . req_size  - the requested size (in elements) to attempt to claim
216: - serialize - (optional, false) whether the claimant allows serialization

218:   Output Parameter:
219: . success - true if the chunk was claimed, false otherwise
220: */
221: template <typename E>
222: template <typename U>
223: inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept
224: {
225:   PetscFunctionBegin;
226:   if ((*success = can_claim(stream, req_size, serialize))) {
227:     if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_));
228:     PetscCall(resize(req_size));
229:     open_ = false;
230:   }
231:   PetscFunctionReturn(PETSC_SUCCESS);
232: }

234: /*
235:   MemoryChunk::can_claim - test whether a particular chunk can be claimed

237:   Input Parameters:
238: + stream    - the stream on which to attempt to claim
239: . req_size  - the requested size (in elements) to attempt to claim
240: - serialize - whether the claimant allows serialization

242:   Output:
243: . [return] - true if the chunk is claimable given the configuration, false otherwise
244: */
245: template <typename E>
246: template <typename U>
247: inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept
248: {
249:   if (open_ && (req_size <= capacity())) {
250:     // fully compatible
251:     if (stream_compat_(stream)) return true;
252:     // stream wasn't compatible, but could claim if we serialized
253:     if (serialize) return true;
254:     // incompatible stream and did not want to serialize
255:   }
256:   return false;
257: }

259: /*
260:   MemoryChunk::resize - grow a chunk to new size

262:   Input Parameter:
263: . newsize - the new size Requested

265:   Notes:
266:   newsize cannot be larger than capacity
267: */
268: template <typename E>
269: inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept
270: {
271:   PetscFunctionBegin;
272:   PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity());
273:   size_ = newsize;
274:   PetscFunctionReturn(PETSC_SUCCESS);
275: }

277: /*
278:   MemoryChunk::contains - query whether a memory chunk contains a particular offset

280:   Input Parameters:
281: . offset - The offset from the MemoryBlock start

283:   Notes:
284:   Returns true if the chunk contains the offset, false otherwise
285: */
286: template <typename E>
287: inline bool MemoryChunk<E>::contains(size_type offset) const noexcept
288: {
289:   return (offset >= start()) && (offset < total_offset());
290: }

292: // ==========================================================================================
293: // MemoryBlock
294: //
295: // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving
296: // and restoring a block is thread-safe (so may be used by multiple device streams).
297: // ==========================================================================================

299: template <typename T, typename AllocatorType, typename StreamType>
300: class MemoryBlock {
301: public:
302:   using value_type      = T;
303:   using allocator_type  = AllocatorType;
304:   using stream_type     = StreamType;
305:   using event_type      = typename stream_type::event_type;
306:   using chunk_type      = MemoryChunk<event_type>;
307:   using size_type       = typename chunk_type::size_type;
308:   using chunk_list_type = std::vector<chunk_type>;

310:   template <typename U>
311:   MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept;

313:   ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value);

315:   MemoryBlock(MemoryBlock &&) noexcept;
316:   MemoryBlock &operator=(MemoryBlock &&) noexcept;

318:   // memory blocks are not copyable
319:   MemoryBlock(const MemoryBlock &)            = delete;
320:   MemoryBlock &operator=(const MemoryBlock &) = delete;

322:   /* --- actual functions --- */
323:   PetscErrorCode       try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept;
324:   PetscErrorCode       try_deallocate_chunk(T **, const stream_type *, bool *) noexcept;
325:   PetscErrorCode       try_find_chunk(const T *, chunk_type **) noexcept;
326:   PETSC_NODISCARD bool owns_pointer(const T *) const noexcept;

328:   PETSC_NODISCARD size_type size() const noexcept { return size_; }
329:   PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); }
330:   PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); }

332: private:
333:   value_type     *mem_{};
334:   allocator_type *allocator_{};
335:   size_type       size_{};
336:   chunk_list_type chunks_{};

338:   PetscErrorCode clear_(const stream_type *) noexcept;
339: };

341: // ==========================================================================================
342: // MemoryBlock - Private API
343: // ==========================================================================================

345: // clear the memory block, called from destructors and move assignment/construction
346: template <typename T, typename A, typename S>
347: PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept
348: {
349:   PetscFunctionBegin;
350:   if (PetscLikely(mem_)) {
351:     PetscCall(allocator_->deallocate(mem_, stream));
352:     mem_ = nullptr;
353:   }
354:   size_ = 0;
355:   PetscCallCXX(chunks_.clear());
356:   PetscFunctionReturn(PETSC_SUCCESS);
357: }

359: // ==========================================================================================
360: // MemoryBlock - Public API
361: // ==========================================================================================

363: // default constructor, allocates memory immediately
364: template <typename T, typename A, typename S>
365: template <typename U>
366: MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s)
367: {
368:   PetscFunctionBegin;
369:   PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream));
370:   PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s);
371:   PetscFunctionReturnVoid();
372: }

374: template <typename T, typename A, typename S>
375: MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value)
376: {
377:   stream_type stream;

379:   PetscFunctionBegin;
380:   PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
381:   PetscFunctionReturnVoid();
382: }

384: template <typename T, typename A, typename S>
385: MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_))
386: {
387: }

389: template <typename T, typename A, typename S>
390: MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept
391: {
392:   PetscFunctionBegin;
393:   if (this != &other) {
394:     stream_type stream;

396:     PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
397:     mem_       = util::exchange(other.mem_, nullptr);
398:     allocator_ = other.allocator_;
399:     size_      = util::exchange(other.size_, 0);
400:     chunks_    = std::move(other.chunks_);
401:   }
402:   PetscFunctionReturn(*this);
403: }

405: /*
406:   MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise
407: */
408: template <typename T, typename A, typename S>
409: inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept
410: {
411:   // each pool is linear in memory, so it suffices to check the bounds
412:   return (ptr >= mem_) && (ptr < std::next(mem_, size()));
413: }

415: /*
416:   MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock

418:   Input Parameters:
419: + req_size - the requested size of the allocation (in elements)
420: . ptr      - ptr to fill
421: - stream   - stream to fill the pointer on

423:   Output Parameter:
424: . success  - true if chunk was gotten, false otherwise

426:   Notes:
427:   If the current memory could not satisfy the memory request, ptr is unchanged
428: */
429: template <typename T, typename A, typename S>
430: inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept
431: {
432:   PetscFunctionBegin;
433:   *success = false;
434:   if (req_size <= size()) {
435:     const auto try_create_chunk = [&]() {
436:       const auto was_empty     = chunks_.empty();
437:       const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset();

439:       PetscFunctionBegin;
440:       if (block_alloced + req_size <= size()) {
441:         PetscCallCXX(chunks_.emplace_back(block_alloced, req_size));
442:         PetscCall(chunks_.back().claim(stream, req_size, success));
443:         *ptr = mem_ + block_alloced;
444:         if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size());
445:       }
446:       PetscFunctionReturn(PETSC_SUCCESS);
447:     };
448:     const auto try_find_open_chunk = [&](bool serialize = false) {
449:       PetscFunctionBegin;
450:       for (auto &chunk : chunks_) {
451:         PetscCall(chunk.claim(stream, req_size, success, serialize));
452:         if (*success) {
453:           *ptr = mem_ + chunk.start();
454:           break;
455:         }
456:       }
457:       PetscFunctionReturn(PETSC_SUCCESS);
458:     };
459:     const auto try_steal_other_stream_chunk = [&]() {
460:       PetscFunctionBegin;
461:       PetscCall(try_find_open_chunk(true));
462:       PetscFunctionReturn(PETSC_SUCCESS);
463:     };

465:     // search previously distributed chunks, but only claim one if it is on the same stream
466:     // as us
467:     PetscCall(try_find_open_chunk());

469:     // if we are here we couldn't reuse one of our own chunks so check first if the pool
470:     // has room for a new one
471:     if (!*success) PetscCall(try_create_chunk());

473:     // try pruning dead chunks off the back, note we do this regardless of whether we are
474:     // successful
475:     while (chunks_.back().can_claim(stream, 0, false)) {
476:       PetscCallCXX(chunks_.pop_back());
477:       if (chunks_.empty()) {
478:         // if chunks are empty it implies we have managed to claim (and subsequently destroy)
479:         // our own chunk twice! something has gone wrong
480:         PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size());
481:         break;
482:       }
483:     }

485:     // if previously unsuccessful see if enough space has opened up due to pruning. note that
486:     // if the chunk list was emptied from the pruning this call must succeed in allocating a
487:     // chunk, otherwise something is wrong
488:     if (!*success) PetscCall(try_create_chunk());

490:     // last resort, iterate over all chunks and see if we can steal one by waiting on the
491:     // current owner to finish using it
492:     if (!*success) PetscCall(try_steal_other_stream_chunk());
493:   }
494:   PetscFunctionReturn(PETSC_SUCCESS);
495: }

497: /*
498:   MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock

500:   Input Parameters:
501: + ptr     - ptr to restore
502: - stream  - stream to restore the pointer on

504:   Output Parameter:
505: . success - true if chunk was restored, false otherwise

507:   Notes:
508:   ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned
509:   by this MemoryBlock then it is restored on stream. The same stream may receive ptr again
510:   without synchronization, but other streams may not do so until either serializing or the
511:   stream is idle again.
512: */
513: template <typename T, typename A, typename S>
514: inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept
515: {
516:   chunk_type *chunk = nullptr;

518:   PetscFunctionBegin;
519:   PetscCall(try_find_chunk(*ptr, &chunk));
520:   if (chunk) {
521:     PetscCall(chunk->release(stream));
522:     *ptr     = nullptr;
523:     *success = true;
524:   } else {
525:     *success = false;
526:   }
527:   PetscFunctionReturn(PETSC_SUCCESS);
528: }

530: /*
531:   MemoryBlock::try_find_chunk - try to find the chunk which owns ptr

533:   Input Parameter:
534: . ptr - the pointer to look for

536:   Output Parameter:
537: . ret_chunk - pointer to the owning chunk or nullptr if not found
538: */
539: template <typename T, typename A, typename S>
540: inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept
541: {
542:   PetscFunctionBegin;
543:   *ret_chunk = nullptr;
544:   if (owns_pointer(ptr)) {
545:     const auto offset = static_cast<size_type>(ptr - mem_);

547:     for (auto &chunk : chunks_) {
548:       if (chunk.contains(offset)) {
549:         *ret_chunk = &chunk;
550:         break;
551:       }
552:     }

554:     PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size())));
555:   }
556:   PetscFunctionReturn(PETSC_SUCCESS);
557: }

559: namespace detail
560: {

562: template <typename T>
563: struct real_type {
564:   using type = T;
565: };

567: template <>
568: struct real_type<PetscScalar> {
569:   using type = PetscReal;
570: };

572: } // namespace detail

574: template <typename T>
575: struct SegmentedMemoryPoolAllocatorBase {
576:   using value_type      = T;
577:   using size_type       = std::size_t;
578:   using real_value_type = typename detail::real_type<T>::type;

580:   template <typename U>
581:   static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept;
582:   template <typename U>
583:   static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept;
584:   template <typename U>
585:   static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept;
586:   template <typename U>
587:   static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept;
588:   template <typename U>
589:   static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept;
590: };

592: template <typename T>
593: template <typename U>
594: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept
595: {
596:   PetscFunctionBegin;
597:   PetscCall(PetscMalloc1(n, ptr));
598:   PetscFunctionReturn(PETSC_SUCCESS);
599: }

601: template <typename T>
602: template <typename U>
603: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept
604: {
605:   PetscFunctionBegin;
606:   PetscCall(PetscFree(ptr));
607:   PetscFunctionReturn(PETSC_SUCCESS);
608: }

610: template <typename T>
611: template <typename U>
612: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
613: {
614:   PetscFunctionBegin;
615:   PetscCall(PetscArrayzero(ptr, n));
616:   PetscFunctionReturn(PETSC_SUCCESS);
617: }

619: template <typename T>
620: template <typename U>
621: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept
622: {
623:   PetscFunctionBegin;
624:   PetscCall(PetscArraycpy(dest, src, n));
625:   PetscFunctionReturn(PETSC_SUCCESS);
626: }

628: template <typename T>
629: template <typename U>
630: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
631: {
632:   using limit_type            = std::numeric_limits<real_value_type>;
633:   constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max();

635:   PetscFunctionBegin;
636:   for (size_type i = 0; i < n; ++i) ptr[i] = canary;
637:   PetscFunctionReturn(PETSC_SUCCESS);
638: }

640: } // namespace impl

642: // ==========================================================================================
643: // SegmentedMemoryPool
644: //
645: // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an
646: // allocated buffer. This buffer is further split into memory "chunks" which control
647: // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states:
648: //
649: // 1. Open:
650: //    The chunk is free to be claimed by the next suitable allocation request. If the
651: //    allocation request is made on the same stream as the chunk was deallocated on, no
652: //    serialization needs to occur. If not, the allocating stream must wait for the
653: //    event. Claiming the chunk "closes" the chunk.
654: //
655: // 2. Closed:
656: //    The chunk has been claimed by an allocation request. It cannot be opened again until it
657: //    is deallocated; doing so "opens" the chunk.
658: //
659: // Note that there does not need to be a chunk for every region, chunks are created to satisfy
660: // an allocation request.
661: //
662: // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may
663: // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation
664: // request. This region exists _only_ at the end, as there are no gaps between chunks.
665: //
666: //
667: // |-----------------------------------------------------------------------------------------
668: // | SegmentedMemoryPool
669: // |
670: // | ||-------------||
671: // | ||             ||    -------------------------------------------------------------------
672: // | ||             ||    | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX...
673: // | ||             ||    | |             |      |                   |            |
674: // | ||             ||    | x-----x-------x-----xx---------x---------x------x-----x
675: // | || MemoryBlock || -> | ------|-------------|----------|----------------|--------
676: // | ||             ||    | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk |
677: // | ||             ||    | ---------------------------------------------------------
678: // | ||             ||    -------------------------------------------------------------------
679: // | ||-------------||
680: // | ||             ||
681: // | ||     ...     ||
682: // | ||             ||
683: // ==========================================================================================

685: template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256>
686: class SegmentedMemoryPool;

688: // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks.
689: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
690: class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> {
691: public:
692:   using value_type     = MemType;
693:   using stream_type    = StreamType;
694:   using allocator_type = AllocType;
695:   using block_type     = impl::MemoryBlock<value_type, allocator_type, stream_type>;
696:   using pool_type      = std::deque<block_type>;
697:   using size_type      = typename block_type::size_type;

699:   explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value);

701:   PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept;
702:   PetscErrorCode deallocate(value_type **, const stream_type *) noexcept;
703:   PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept;

705: private:
706:   pool_type      pool_;
707:   allocator_type allocator_;
708:   size_type      chunk_size_;

710:   PetscErrorCode make_block_(size_type, const stream_type *) noexcept;

712:   friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>;
713:   PetscErrorCode register_finalize_(const stream_type *) noexcept;
714:   PetscErrorCode finalize_() noexcept;

716:   PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept;
717: };

719: // ==========================================================================================
720: // SegmentedMemoryPool - Private API
721: // ==========================================================================================

723: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
724: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept
725: {
726:   const auto block_size = std::max(size, chunk_size_);

728:   PetscFunctionBegin;
729:   PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream));
730:   PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size()));
731:   PetscFunctionReturn(PETSC_SUCCESS);
732: }

734: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
735: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept
736: {
737:   PetscFunctionBegin;
738:   PetscCall(make_block_(chunk_size_, stream));
739:   PetscFunctionReturn(PETSC_SUCCESS);
740: }

742: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
743: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept
744: {
745:   PetscFunctionBegin;
746:   PetscCallCXX(pool_.clear());
747:   chunk_size_ = DefaultChunkSize;
748:   PetscFunctionReturn(PETSC_SUCCESS);
749: }

751: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
752: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept
753: {
754:   auto found = false;

756:   PetscFunctionBegin;
757:   PetscCall(this->register_finalize(stream));
758:   for (auto &block : pool_) {
759:     PetscCall(block.try_allocate_chunk(size, ptr, stream, &found));
760:     if (PetscLikely(found)) PetscFunctionReturn(PETSC_SUCCESS);
761:   }

763:   PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size));
764:   // if we are here we couldn't find an open block in the pool, so make a new block
765:   PetscCall(make_block_(size, stream));
766:   // and assign it
767:   PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found));
768:   PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size());
769:   PetscFunctionReturn(PETSC_SUCCESS);
770: }

772: // ==========================================================================================
773: // SegmentedMemoryPool - Public API
774: // ==========================================================================================

776: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
777: inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size)
778: {
779: }

781: /*
782:   SegmentedMemoryPool::allocate - get an allocation from the memory pool

784:   Input Parameters:
785: + req_size - size (in elements) to get
786: . ptr      - the pointer to hold the allocation
787: - stream   - the stream on which to get the allocation

789:   Output Parameter:
790: . ptr - the pointer holding the allocation

792:   Notes:
793:   req_size cannot be negative. If req_size if zero, ptr is set to nullptr
794: */
795: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
796: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept
797: {
798:   value_type *ret_ptr = nullptr;

800:   PetscFunctionBegin;
801:   PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size);
802:   PetscAssertPointer(ptr, 2);
803:   PetscAssertPointer(stream, 3);
804:   if (req_size) {
805:     const auto size         = static_cast<size_type>(req_size);
806:     auto       aligned_size = alignment == alignof(char) ? size : size + alignment;
807:     void      *vptr         = nullptr;

809:     PetscCall(allocate_(aligned_size, &ret_ptr, stream));
810:     vptr = ret_ptr;
811:     std::align(alignment, size, vptr, aligned_size);
812:     ret_ptr = reinterpret_cast<value_type *>(vptr);
813:     // sets memory to NaN or infinity depending on the type to catch out uninitialized memory
814:     // accesses.
815:     if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream));
816:   }
817:   *ptr = ret_ptr;
818:   PetscFunctionReturn(PETSC_SUCCESS);
819: }

821: /*
822:   SegmentedMemoryPool::deallocate - release a pointer back to the memory pool

824:   Input Parameters:
825: + ptr    - the pointer to release
826: - stream - the stream to release it on

828:   Notes:
829:   If ptr is not owned by the pool it is unchanged.
830: */
831: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
832: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept
833: {
834:   PetscFunctionBegin;
835:   PetscAssertPointer(ptr, 1);
836:   PetscAssertPointer(stream, 2);
837:   // nobody owns a nullptr, and if they do then they have bigger problems
838:   if (!*ptr) PetscFunctionReturn(PETSC_SUCCESS);
839:   for (auto &block : pool_) {
840:     auto found = false;

842:     PetscCall(block.try_deallocate_chunk(ptr, stream, &found));
843:     if (PetscLikely(found)) break;
844:   }
845:   PetscFunctionReturn(PETSC_SUCCESS);
846: }

848: /*
849:   SegmentedMemoryPool::reallocate - Resize an allocated buffer

851:   Input Parameters:
852: + new_req_size - the new buffer size
853: . ptr          - pointer to the buffer
854: - stream       - stream to resize with

856:   Output Parameter:
857: . ptr - pointer to the new region

859:   Notes:
860:   ptr must have been allocated by the pool.

862:   It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated).
863: */
864: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
865: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept
866: {
867:   using chunk_type = typename block_type::chunk_type;

869:   const auto  new_size = static_cast<size_type>(new_req_size);
870:   const auto  old_ptr  = *ptr;
871:   chunk_type *chunk    = nullptr;

873:   PetscFunctionBegin;
874:   PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size);
875:   PetscAssertPointer(ptr, 2);
876:   PetscAssertPointer(stream, 3);

878:   // if reallocating to zero, just free
879:   if (PetscUnlikely(new_size == 0)) {
880:     PetscCall(deallocate(ptr, stream));
881:     PetscFunctionReturn(PETSC_SUCCESS);
882:   }

884:   // search the blocks for the owning chunk
885:   for (auto &block : pool_) {
886:     PetscCall(block.try_find_chunk(old_ptr, &chunk));
887:     if (chunk) break; // found
888:   }
889:   PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr);

891:   if (chunk->capacity() < new_size) {
892:     // chunk does not have enough room, need to grab a fresh chunk and copy to it
893:     *ptr = nullptr;
894:     PetscCall(chunk->release(stream));
895:     PetscCall(allocate(new_size, ptr, stream));
896:     PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream));
897:   } else {
898:     // chunk had enough room we can simply grow (or shrink) to fit the new size
899:     PetscCall(chunk->resize(new_size));
900:   }
901:   PetscFunctionReturn(PETSC_SUCCESS);
902: }

904: } // namespace memory

906: } // namespace Petsc