Actual source code: segmentedmempool.hpp
1: #pragma once
3: #include <petsc/private/deviceimpl.h>
5: #include <petsc/private/cpp/macros.hpp>
6: #include <petsc/private/cpp/type_traits.hpp>
7: #include <petsc/private/cpp/utility.hpp>
8: #include <petsc/private/cpp/register_finalize.hpp>
9: #include <petsc/private/cpp/memory.hpp>
11: #include <limits>
12: #include <deque>
13: #include <vector>
15: namespace Petsc
16: {
18: namespace device
19: {
21: template <typename T>
22: class StreamBase {
23: public:
24: using id_type = int;
25: using derived_type = T;
27: static const id_type INVALID_ID;
29: // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion
30: template <typename U = T>
31: PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_());
33: PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); }
35: template <typename E>
36: PetscErrorCode record_event(E &&event) const noexcept
37: {
38: return static_cast<const T &>(*this).record_event_(std::forward<E>(event));
39: }
41: template <typename E>
42: PetscErrorCode wait_for_event(E &&event) const noexcept
43: {
44: return static_cast<const T &>(*this).wait_for_(std::forward<E>(event));
45: }
47: protected:
48: constexpr StreamBase() noexcept = default;
50: struct default_event_type { };
51: using default_stream_type = std::nullptr_t;
53: PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; }
55: PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; }
57: template <typename U = T>
58: static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept
59: {
60: return PETSC_SUCCESS;
61: }
63: template <typename U = T>
64: static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept
65: {
66: return PETSC_SUCCESS;
67: }
68: };
70: template <typename T>
71: const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1;
73: struct DefaultStream : StreamBase<DefaultStream> {
74: using stream_type = typename StreamBase<DefaultStream>::default_stream_type;
75: using id_type = typename StreamBase<DefaultStream>::id_type;
76: using event_type = typename StreamBase<DefaultStream>::default_event_type;
77: };
79: } // namespace device
81: namespace memory
82: {
84: namespace impl
85: {
87: // ==========================================================================================
88: // MemoryChunk
89: //
90: // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning
91: // MemoryBlock and its size/capacity
92: // ==========================================================================================
94: template <typename EventType>
95: class MemoryChunk {
96: public:
97: using event_type = EventType;
98: using size_type = std::size_t;
100: MemoryChunk(size_type, size_type) noexcept;
101: explicit MemoryChunk(size_type) noexcept;
103: MemoryChunk(MemoryChunk &&) noexcept;
104: MemoryChunk &operator=(MemoryChunk &&) noexcept;
106: MemoryChunk(const MemoryChunk &) noexcept = delete;
107: MemoryChunk &operator=(const MemoryChunk &) noexcept = delete;
109: PETSC_NODISCARD size_type start() const noexcept { return start_; }
110: PETSC_NODISCARD size_type size() const noexcept { return size_; }
111: // REVIEW ME:
112: // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in
113: // theory only the last chunk needs to do this
114: PETSC_NODISCARD size_type capacity() const noexcept { return size_; }
115: PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); }
117: template <typename U>
118: PetscErrorCode release(const device::StreamBase<U> *) noexcept;
119: template <typename U>
120: PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept;
121: template <typename U>
122: PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept;
123: PetscErrorCode resize(size_type) noexcept;
124: PETSC_NODISCARD bool contains(size_type) const noexcept;
126: private:
127: event_type event_{}; // event recorded when the chunk was released
128: bool open_ = true; // is this chunk open?
129: int stream_id_ = device::DefaultStream::INVALID_ID; // id of the last stream to use the chunk, populated on release
130: size_type size_ = 0; // size of the chunk
131: const size_type start_ = 0; // offset from the start of the owning block
133: template <typename U>
134: PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept;
135: };
137: // ==========================================================================================
138: // MemoryChunk - Private API
139: // ==========================================================================================
141: // asks and answers the question: can this stream claim this chunk without serializing?
142: template <typename E>
143: template <typename U>
144: inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept
145: {
146: return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id());
147: }
149: // ==========================================================================================
150: // MemoryChunk - Public API
151: // ==========================================================================================
153: template <typename E>
154: inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start)
155: {
156: }
158: template <typename E>
159: inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size)
160: {
161: }
163: template <typename E>
164: inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept :
165: event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_))
166: {
167: }
169: template <typename E>
170: inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept
171: {
172: PetscFunctionBegin;
173: if (this != &other) {
174: event_ = std::move(other.event_);
175: open_ = util::exchange(other.open_, false);
176: stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID);
177: size_ = util::exchange(other.size_, 0);
178: start_ = std::move(other.start_);
179: }
180: PetscFunctionReturn(*this);
181: }
183: /*
184: MemoryChunk::release - release a chunk on a stream
186: Input Parameter:
187: . stream - the stream to release the chunk with
189: Notes:
190: Inserts a release operation on stream and records the state of stream at the time this
191: routine was called.
193: Future allocation requests which attempt to claim the chunk on the same stream may re-acquire
194: the chunk without serialization.
196: If another stream attempts to claim the chunk they must wait for the recorded event before
197: claiming the chunk.
198: */
199: template <typename E>
200: template <typename U>
201: inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept
202: {
203: PetscFunctionBegin;
204: open_ = true;
205: stream_id_ = stream->get_id();
206: PetscCall(stream->record_event(event_));
207: PetscFunctionReturn(PETSC_SUCCESS);
208: }
210: /*
211: MemoryChunk::claim - attempt to claim a particular chunk
213: Input Parameters:
214: + stream - the stream on which to attempt to claim
215: . req_size - the requested size (in elements) to attempt to claim
216: - serialize - (optional, false) whether the claimant allows serialization
218: Output Parameter:
219: . success - true if the chunk was claimed, false otherwise
220: */
221: template <typename E>
222: template <typename U>
223: inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept
224: {
225: PetscFunctionBegin;
226: if ((*success = can_claim(stream, req_size, serialize))) {
227: if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_));
228: PetscCall(resize(req_size));
229: open_ = false;
230: }
231: PetscFunctionReturn(PETSC_SUCCESS);
232: }
234: /*
235: MemoryChunk::can_claim - test whether a particular chunk can be claimed
237: Input Parameters:
238: + stream - the stream on which to attempt to claim
239: . req_size - the requested size (in elements) to attempt to claim
240: - serialize - whether the claimant allows serialization
242: Output:
243: . [return] - true if the chunk is claimable given the configuration, false otherwise
244: */
245: template <typename E>
246: template <typename U>
247: inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept
248: {
249: if (open_ && (req_size <= capacity())) {
250: // fully compatible
251: if (stream_compat_(stream)) return true;
252: // stream wasn't compatible, but could claim if we serialized
253: if (serialize) return true;
254: // incompatible stream and did not want to serialize
255: }
256: return false;
257: }
259: /*
260: MemoryChunk::resize - grow a chunk to new size
262: Input Parameter:
263: . newsize - the new size Requested
265: Notes:
266: newsize cannot be larger than capacity
267: */
268: template <typename E>
269: inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept
270: {
271: PetscFunctionBegin;
272: PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity());
273: size_ = newsize;
274: PetscFunctionReturn(PETSC_SUCCESS);
275: }
277: /*
278: MemoryChunk::contains - query whether a memory chunk contains a particular offset
280: Input Parameters:
281: . offset - The offset from the MemoryBlock start
283: Notes:
284: Returns true if the chunk contains the offset, false otherwise
285: */
286: template <typename E>
287: inline bool MemoryChunk<E>::contains(size_type offset) const noexcept
288: {
289: return (offset >= start()) && (offset < total_offset());
290: }
292: // ==========================================================================================
293: // MemoryBlock
294: //
295: // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving
296: // and restoring a block is thread-safe (so may be used by multiple device streams).
297: // ==========================================================================================
299: template <typename T, typename AllocatorType, typename StreamType>
300: class MemoryBlock {
301: public:
302: using value_type = T;
303: using allocator_type = AllocatorType;
304: using stream_type = StreamType;
305: using event_type = typename stream_type::event_type;
306: using chunk_type = MemoryChunk<event_type>;
307: using size_type = typename chunk_type::size_type;
308: using chunk_list_type = std::vector<chunk_type>;
310: template <typename U>
311: MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept;
313: ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value);
315: MemoryBlock(MemoryBlock &&) noexcept;
316: MemoryBlock &operator=(MemoryBlock &&) noexcept;
318: // memory blocks are not copyable
319: MemoryBlock(const MemoryBlock &) = delete;
320: MemoryBlock &operator=(const MemoryBlock &) = delete;
322: /* --- actual functions --- */
323: PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept;
324: PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept;
325: PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept;
326: PETSC_NODISCARD bool owns_pointer(const T *) const noexcept;
328: PETSC_NODISCARD size_type size() const noexcept { return size_; }
329: PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); }
330: PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); }
332: private:
333: value_type *mem_{};
334: allocator_type *allocator_{};
335: size_type size_{};
336: chunk_list_type chunks_{};
338: PetscErrorCode clear_(const stream_type *) noexcept;
339: };
341: // ==========================================================================================
342: // MemoryBlock - Private API
343: // ==========================================================================================
345: // clear the memory block, called from destructors and move assignment/construction
346: template <typename T, typename A, typename S>
347: PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept
348: {
349: PetscFunctionBegin;
350: if (PetscLikely(mem_)) {
351: PetscCall(allocator_->deallocate(mem_, stream));
352: mem_ = nullptr;
353: }
354: size_ = 0;
355: PetscCallCXX(chunks_.clear());
356: PetscFunctionReturn(PETSC_SUCCESS);
357: }
359: // ==========================================================================================
360: // MemoryBlock - Public API
361: // ==========================================================================================
363: // default constructor, allocates memory immediately
364: template <typename T, typename A, typename S>
365: template <typename U>
366: MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s)
367: {
368: PetscFunctionBegin;
369: PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream));
370: PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s);
371: PetscFunctionReturnVoid();
372: }
374: template <typename T, typename A, typename S>
375: MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value)
376: {
377: stream_type stream;
379: PetscFunctionBegin;
380: PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
381: PetscFunctionReturnVoid();
382: }
384: template <typename T, typename A, typename S>
385: MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_))
386: {
387: }
389: template <typename T, typename A, typename S>
390: MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept
391: {
392: PetscFunctionBegin;
393: if (this != &other) {
394: stream_type stream;
396: PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
397: mem_ = util::exchange(other.mem_, nullptr);
398: allocator_ = other.allocator_;
399: size_ = util::exchange(other.size_, 0);
400: chunks_ = std::move(other.chunks_);
401: }
402: PetscFunctionReturn(*this);
403: }
405: /*
406: MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise
407: */
408: template <typename T, typename A, typename S>
409: inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept
410: {
411: // each pool is linear in memory, so it suffices to check the bounds
412: return (ptr >= mem_) && (ptr < std::next(mem_, size()));
413: }
415: /*
416: MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock
418: Input Parameters:
419: + req_size - the requested size of the allocation (in elements)
420: . ptr - ptr to fill
421: - stream - stream to fill the pointer on
423: Output Parameter:
424: . success - true if chunk was gotten, false otherwise
426: Notes:
427: If the current memory could not satisfy the memory request, ptr is unchanged
428: */
429: template <typename T, typename A, typename S>
430: inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept
431: {
432: PetscFunctionBegin;
433: *success = false;
434: if (req_size <= size()) {
435: const auto try_create_chunk = [&]() {
436: const auto was_empty = chunks_.empty();
437: const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset();
439: PetscFunctionBegin;
440: if (block_alloced + req_size <= size()) {
441: PetscCallCXX(chunks_.emplace_back(block_alloced, req_size));
442: PetscCall(chunks_.back().claim(stream, req_size, success));
443: *ptr = mem_ + block_alloced;
444: if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size());
445: }
446: PetscFunctionReturn(PETSC_SUCCESS);
447: };
448: const auto try_find_open_chunk = [&](bool serialize = false) {
449: PetscFunctionBegin;
450: for (auto &chunk : chunks_) {
451: PetscCall(chunk.claim(stream, req_size, success, serialize));
452: if (*success) {
453: *ptr = mem_ + chunk.start();
454: break;
455: }
456: }
457: PetscFunctionReturn(PETSC_SUCCESS);
458: };
459: const auto try_steal_other_stream_chunk = [&]() {
460: PetscFunctionBegin;
461: PetscCall(try_find_open_chunk(true));
462: PetscFunctionReturn(PETSC_SUCCESS);
463: };
465: // search previously distributed chunks, but only claim one if it is on the same stream
466: // as us
467: PetscCall(try_find_open_chunk());
469: // if we are here we couldn't reuse one of our own chunks so check first if the pool
470: // has room for a new one
471: if (!*success) PetscCall(try_create_chunk());
473: // try pruning dead chunks off the back, note we do this regardless of whether we are
474: // successful
475: while (chunks_.back().can_claim(stream, 0, false)) {
476: PetscCallCXX(chunks_.pop_back());
477: if (chunks_.empty()) {
478: // if chunks are empty it implies we have managed to claim (and subsequently destroy)
479: // our own chunk twice! something has gone wrong
480: PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size());
481: break;
482: }
483: }
485: // if previously unsuccessful see if enough space has opened up due to pruning. note that
486: // if the chunk list was emptied from the pruning this call must succeed in allocating a
487: // chunk, otherwise something is wrong
488: if (!*success) PetscCall(try_create_chunk());
490: // last resort, iterate over all chunks and see if we can steal one by waiting on the
491: // current owner to finish using it
492: if (!*success) PetscCall(try_steal_other_stream_chunk());
493: }
494: PetscFunctionReturn(PETSC_SUCCESS);
495: }
497: /*
498: MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock
500: Input Parameters:
501: + ptr - ptr to restore
502: - stream - stream to restore the pointer on
504: Output Parameter:
505: . success - true if chunk was restored, false otherwise
507: Notes:
508: ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned
509: by this MemoryBlock then it is restored on stream. The same stream may receive ptr again
510: without synchronization, but other streams may not do so until either serializing or the
511: stream is idle again.
512: */
513: template <typename T, typename A, typename S>
514: inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept
515: {
516: chunk_type *chunk = nullptr;
518: PetscFunctionBegin;
519: PetscCall(try_find_chunk(*ptr, &chunk));
520: if (chunk) {
521: PetscCall(chunk->release(stream));
522: *ptr = nullptr;
523: *success = true;
524: } else {
525: *success = false;
526: }
527: PetscFunctionReturn(PETSC_SUCCESS);
528: }
530: /*
531: MemoryBlock::try_find_chunk - try to find the chunk which owns ptr
533: Input Parameter:
534: . ptr - the pointer to look for
536: Output Parameter:
537: . ret_chunk - pointer to the owning chunk or nullptr if not found
538: */
539: template <typename T, typename A, typename S>
540: inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept
541: {
542: PetscFunctionBegin;
543: *ret_chunk = nullptr;
544: if (owns_pointer(ptr)) {
545: const auto offset = static_cast<size_type>(ptr - mem_);
547: for (auto &chunk : chunks_) {
548: if (chunk.contains(offset)) {
549: *ret_chunk = &chunk;
550: break;
551: }
552: }
554: PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size())));
555: }
556: PetscFunctionReturn(PETSC_SUCCESS);
557: }
559: namespace detail
560: {
562: template <typename T>
563: struct real_type {
564: using type = T;
565: };
567: template <>
568: struct real_type<PetscScalar> {
569: using type = PetscReal;
570: };
572: } // namespace detail
574: template <typename T>
575: struct SegmentedMemoryPoolAllocatorBase {
576: using value_type = T;
577: using size_type = std::size_t;
578: using real_value_type = typename detail::real_type<T>::type;
580: template <typename U>
581: static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept;
582: template <typename U>
583: static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept;
584: template <typename U>
585: static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept;
586: template <typename U>
587: static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept;
588: template <typename U>
589: static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept;
590: };
592: template <typename T>
593: template <typename U>
594: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept
595: {
596: PetscFunctionBegin;
597: PetscCall(PetscMalloc1(n, ptr));
598: PetscFunctionReturn(PETSC_SUCCESS);
599: }
601: template <typename T>
602: template <typename U>
603: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept
604: {
605: PetscFunctionBegin;
606: PetscCall(PetscFree(ptr));
607: PetscFunctionReturn(PETSC_SUCCESS);
608: }
610: template <typename T>
611: template <typename U>
612: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
613: {
614: PetscFunctionBegin;
615: PetscCall(PetscArrayzero(ptr, n));
616: PetscFunctionReturn(PETSC_SUCCESS);
617: }
619: template <typename T>
620: template <typename U>
621: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept
622: {
623: PetscFunctionBegin;
624: PetscCall(PetscArraycpy(dest, src, n));
625: PetscFunctionReturn(PETSC_SUCCESS);
626: }
628: template <typename T>
629: template <typename U>
630: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
631: {
632: using limit_type = std::numeric_limits<real_value_type>;
633: constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max();
635: PetscFunctionBegin;
636: for (size_type i = 0; i < n; ++i) ptr[i] = canary;
637: PetscFunctionReturn(PETSC_SUCCESS);
638: }
640: } // namespace impl
642: // ==========================================================================================
643: // SegmentedMemoryPool
644: //
645: // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an
646: // allocated buffer. This buffer is further split into memory "chunks" which control
647: // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states:
648: //
649: // 1. Open:
650: // The chunk is free to be claimed by the next suitable allocation request. If the
651: // allocation request is made on the same stream as the chunk was deallocated on, no
652: // serialization needs to occur. If not, the allocating stream must wait for the
653: // event. Claiming the chunk "closes" the chunk.
654: //
655: // 2. Closed:
656: // The chunk has been claimed by an allocation request. It cannot be opened again until it
657: // is deallocated; doing so "opens" the chunk.
658: //
659: // Note that there does not need to be a chunk for every region, chunks are created to satisfy
660: // an allocation request.
661: //
662: // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may
663: // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation
664: // request. This region exists _only_ at the end, as there are no gaps between chunks.
665: //
666: //
667: // |-----------------------------------------------------------------------------------------
668: // | SegmentedMemoryPool
669: // |
670: // | ||-------------||
671: // | || || -------------------------------------------------------------------
672: // | || || | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX...
673: // | || || | | | | | |
674: // | || || | x-----x-------x-----xx---------x---------x------x-----x
675: // | || MemoryBlock || -> | ------|-------------|----------|----------------|--------
676: // | || || | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk |
677: // | || || | ---------------------------------------------------------
678: // | || || -------------------------------------------------------------------
679: // | ||-------------||
680: // | || ||
681: // | || ... ||
682: // | || ||
683: // ==========================================================================================
685: template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256>
686: class SegmentedMemoryPool;
688: // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks.
689: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
690: class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> {
691: public:
692: using value_type = MemType;
693: using stream_type = StreamType;
694: using allocator_type = AllocType;
695: using block_type = impl::MemoryBlock<value_type, allocator_type, stream_type>;
696: using pool_type = std::deque<block_type>;
697: using size_type = typename block_type::size_type;
699: explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value);
701: PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept;
702: PetscErrorCode deallocate(value_type **, const stream_type *) noexcept;
703: PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept;
705: private:
706: pool_type pool_;
707: allocator_type allocator_;
708: size_type chunk_size_;
710: PetscErrorCode make_block_(size_type, const stream_type *) noexcept;
712: friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>;
713: PetscErrorCode register_finalize_(const stream_type *) noexcept;
714: PetscErrorCode finalize_() noexcept;
716: PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept;
717: };
719: // ==========================================================================================
720: // SegmentedMemoryPool - Private API
721: // ==========================================================================================
723: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
724: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept
725: {
726: const auto block_size = std::max(size, chunk_size_);
728: PetscFunctionBegin;
729: PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream));
730: PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size()));
731: PetscFunctionReturn(PETSC_SUCCESS);
732: }
734: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
735: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept
736: {
737: PetscFunctionBegin;
738: PetscCall(make_block_(chunk_size_, stream));
739: PetscFunctionReturn(PETSC_SUCCESS);
740: }
742: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
743: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept
744: {
745: PetscFunctionBegin;
746: PetscCallCXX(pool_.clear());
747: chunk_size_ = DefaultChunkSize;
748: PetscFunctionReturn(PETSC_SUCCESS);
749: }
751: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
752: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept
753: {
754: auto found = false;
756: PetscFunctionBegin;
757: PetscCall(this->register_finalize(stream));
758: for (auto &block : pool_) {
759: PetscCall(block.try_allocate_chunk(size, ptr, stream, &found));
760: if (PetscLikely(found)) PetscFunctionReturn(PETSC_SUCCESS);
761: }
763: PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size));
764: // if we are here we couldn't find an open block in the pool, so make a new block
765: PetscCall(make_block_(size, stream));
766: // and assign it
767: PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found));
768: PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size());
769: PetscFunctionReturn(PETSC_SUCCESS);
770: }
772: // ==========================================================================================
773: // SegmentedMemoryPool - Public API
774: // ==========================================================================================
776: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
777: inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size)
778: {
779: }
781: /*
782: SegmentedMemoryPool::allocate - get an allocation from the memory pool
784: Input Parameters:
785: + req_size - size (in elements) to get
786: . ptr - the pointer to hold the allocation
787: - stream - the stream on which to get the allocation
789: Output Parameter:
790: . ptr - the pointer holding the allocation
792: Notes:
793: req_size cannot be negative. If req_size if zero, ptr is set to nullptr
794: */
795: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
796: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept
797: {
798: value_type *ret_ptr = nullptr;
800: PetscFunctionBegin;
801: PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size);
802: PetscAssertPointer(ptr, 2);
803: PetscAssertPointer(stream, 3);
804: if (req_size) {
805: const auto size = static_cast<size_type>(req_size);
806: auto aligned_size = alignment == alignof(char) ? size : size + alignment;
807: void *vptr = nullptr;
809: PetscCall(allocate_(aligned_size, &ret_ptr, stream));
810: vptr = ret_ptr;
811: std::align(alignment, size, vptr, aligned_size);
812: ret_ptr = reinterpret_cast<value_type *>(vptr);
813: // sets memory to NaN or infinity depending on the type to catch out uninitialized memory
814: // accesses.
815: if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream));
816: }
817: *ptr = ret_ptr;
818: PetscFunctionReturn(PETSC_SUCCESS);
819: }
821: /*
822: SegmentedMemoryPool::deallocate - release a pointer back to the memory pool
824: Input Parameters:
825: + ptr - the pointer to release
826: - stream - the stream to release it on
828: Notes:
829: If ptr is not owned by the pool it is unchanged.
830: */
831: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
832: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept
833: {
834: PetscFunctionBegin;
835: PetscAssertPointer(ptr, 1);
836: PetscAssertPointer(stream, 2);
837: // nobody owns a nullptr, and if they do then they have bigger problems
838: if (!*ptr) PetscFunctionReturn(PETSC_SUCCESS);
839: for (auto &block : pool_) {
840: auto found = false;
842: PetscCall(block.try_deallocate_chunk(ptr, stream, &found));
843: if (PetscLikely(found)) break;
844: }
845: PetscFunctionReturn(PETSC_SUCCESS);
846: }
848: /*
849: SegmentedMemoryPool::reallocate - Resize an allocated buffer
851: Input Parameters:
852: + new_req_size - the new buffer size
853: . ptr - pointer to the buffer
854: - stream - stream to resize with
856: Output Parameter:
857: . ptr - pointer to the new region
859: Notes:
860: ptr must have been allocated by the pool.
862: It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated).
863: */
864: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
865: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept
866: {
867: using chunk_type = typename block_type::chunk_type;
869: const auto new_size = static_cast<size_type>(new_req_size);
870: const auto old_ptr = *ptr;
871: chunk_type *chunk = nullptr;
873: PetscFunctionBegin;
874: PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size);
875: PetscAssertPointer(ptr, 2);
876: PetscAssertPointer(stream, 3);
878: // if reallocating to zero, just free
879: if (PetscUnlikely(new_size == 0)) {
880: PetscCall(deallocate(ptr, stream));
881: PetscFunctionReturn(PETSC_SUCCESS);
882: }
884: // search the blocks for the owning chunk
885: for (auto &block : pool_) {
886: PetscCall(block.try_find_chunk(old_ptr, &chunk));
887: if (chunk) break; // found
888: }
889: PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr);
891: if (chunk->capacity() < new_size) {
892: // chunk does not have enough room, need to grab a fresh chunk and copy to it
893: *ptr = nullptr;
894: PetscCall(chunk->release(stream));
895: PetscCall(allocate(new_size, ptr, stream));
896: PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream));
897: } else {
898: // chunk had enough room we can simply grow (or shrink) to fit the new size
899: PetscCall(chunk->resize(new_size));
900: }
901: PetscFunctionReturn(PETSC_SUCCESS);
902: }
904: } // namespace memory
906: } // namespace Petsc