Actual source code: cupmevent.hpp
1: #pragma once
3: #include <petsc/private/cupminterface.hpp>
4: #include <petsc/private/cpp/memory.hpp>
5: #include <petsc/private/cpp/object_pool.hpp>
7: #include <stack>
9: namespace Petsc
10: {
12: namespace device
13: {
15: namespace cupm
16: {
18: // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and
19: // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20%
20: // speedup.
21: template <DeviceType T, unsigned long flags>
22: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL CUPMEventPool : impl::Interface<T>, public RegisterFinalizeable<CUPMEventPool<T, flags>> {
23: public:
24: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
26: PetscErrorCode allocate(cupmEvent_t *) noexcept;
27: PetscErrorCode deallocate(cupmEvent_t *) noexcept;
29: PetscErrorCode finalize_() noexcept;
31: private:
32: std::stack<cupmEvent_t> pool_;
33: };
35: template <DeviceType T, unsigned long flags>
36: inline PetscErrorCode CUPMEventPool<T, flags>::finalize_() noexcept
37: {
38: PetscFunctionBegin;
39: while (!pool_.empty()) {
40: PetscCallCUPM(cupmEventDestroy(std::move(pool_.top())));
41: PetscCallCXX(pool_.pop());
42: }
43: PetscFunctionReturn(PETSC_SUCCESS);
44: }
46: template <DeviceType T, unsigned long flags>
47: inline PetscErrorCode CUPMEventPool<T, flags>::allocate(cupmEvent_t *event) noexcept
48: {
49: PetscFunctionBegin;
50: PetscAssertPointer(event, 1);
51: if (pool_.empty()) {
52: PetscCall(this->register_finalize());
53: PetscCallCUPM(cupmEventCreateWithFlags(event, (unsigned int)flags));
54: } else {
55: PetscCallCXX(*event = std::move(pool_.top()));
56: PetscCallCXX(pool_.pop());
57: }
58: PetscFunctionReturn(PETSC_SUCCESS);
59: }
61: template <DeviceType T, unsigned long flags>
62: inline PetscErrorCode CUPMEventPool<T, flags>::deallocate(cupmEvent_t *in_event) noexcept
63: {
64: PetscFunctionBegin;
65: PetscAssertPointer(in_event, 1);
66: if (auto event = std::exchange(*in_event, cupmEvent_t{})) {
67: if (this->registered()) {
68: PetscCallCXX(pool_.push(std::move(event)));
69: } else {
70: PetscCallCUPM(cupmEventDestroy(event));
71: }
72: }
73: PetscFunctionReturn(PETSC_SUCCESS);
74: }
76: template <DeviceType T, unsigned long flags>
77: CUPMEventPool<T, flags> &cupm_event_pool() noexcept
78: {
79: static CUPMEventPool<T, flags> pool;
80: return pool;
81: }
83: // pool of events with timing disabled
84: template <DeviceType T>
85: inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) &
86: {
87: return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>();
88: }
90: // pool of events with timing enabled
91: template <DeviceType T>
92: inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) &
93: {
94: return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>();
95: }
97: // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the
98: // event-stream pairing for the async allocator. It is also used as the data member of
99: // PetscEvent.
100: template <DeviceType T>
101: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL CUPMEvent : impl::Interface<T>, public memory::PoolAllocated {
102: using pool_type = memory::PoolAllocated;
104: public:
105: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
107: constexpr CUPMEvent() noexcept = default;
108: ~CUPMEvent() noexcept;
110: CUPMEvent(CUPMEvent &&) noexcept;
111: CUPMEvent &operator=(CUPMEvent &&) noexcept;
113: // event is not copyable
114: CUPMEvent(const CUPMEvent &) = delete;
115: CUPMEvent &operator=(const CUPMEvent &) = delete;
117: PETSC_NODISCARD cupmEvent_t get() noexcept;
118: PetscErrorCode record(cupmStream_t) noexcept;
120: explicit operator bool() const noexcept;
122: private:
123: cupmEvent_t event_{};
124: };
126: template <DeviceType T>
127: inline CUPMEvent<T>::~CUPMEvent() noexcept
128: {
129: PetscFunctionBegin;
130: PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
131: PetscFunctionReturnVoid();
132: }
134: template <DeviceType T>
135: inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{}))
136: {
137: static_assert(std::is_empty<impl::Interface<T>>::value, "");
138: }
140: template <DeviceType T>
141: inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept
142: {
143: PetscFunctionBegin;
144: if (this != &other) {
145: pool_type::operator=(std::move(other));
146: PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
147: event_ = util::exchange(other.event_, cupmEvent_t{});
148: }
149: PetscFunctionReturn(*this);
150: }
152: template <DeviceType T>
153: inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept
154: {
155: PetscFunctionBegin;
156: if (PetscUnlikely(!event_)) PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_));
157: PetscFunctionReturn(event_);
158: }
160: template <DeviceType T>
161: inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept
162: {
163: PetscFunctionBegin;
164: PetscCallCUPM(cupmEventRecord(get(), stream));
165: PetscFunctionReturn(PETSC_SUCCESS);
166: }
168: template <DeviceType T>
169: inline CUPMEvent<T>::operator bool() const noexcept
170: {
171: return event_ != cupmEvent_t{};
172: }
174: } // namespace cupm
176: } // namespace device
178: } // namespace Petsc