Agora  1.2.0
Agora project
concurrentqueue.h
Go to the documentation of this file.
1 // Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
2 // An overview, including benchmark results, is provided here:
3 // http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
4 // The full design is also described in excruciating detail at:
5 // http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
6 
7 // Simplified BSD license:
8 // Copyright (c) 2013-2020, Cameron Desrochers.
9 // All rights reserved.
10 //
11 // Redistribution and use in source and binary forms, with or without modification,
12 // are permitted provided that the following conditions are met:
13 //
14 // - Redistributions of source code must retain the above copyright notice, this list of
15 // conditions and the following disclaimer.
16 // - Redistributions in binary form must reproduce the above copyright notice, this list of
17 // conditions and the following disclaimer in the documentation and/or other materials
18 // provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
21 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 // THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
25 // OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
27 // TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
28 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // Also dual-licensed under the Boost Software License (see LICENSE.md)
31 
32 #pragma once
33 
34 #if defined(__GNUC__)
35 // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
36 // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
37 // upon assigning any computed values)
38 #pragma GCC diagnostic push
39 #pragma GCC diagnostic ignored "-Wconversion"
40 
41 #ifdef MCDBGQ_USE_RELACY
42 #pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
43 #endif
44 #endif
45 
46 #if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
47 // VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
48 // does not support `if constexpr`, so we have no choice but to simply disable the warning
49 #pragma warning(push)
50 #pragma warning(disable: 4127) // conditional expression is constant
51 #endif
52 
53 #if defined(__APPLE__)
54 #include "TargetConditionals.h"
55 #endif
56 
57 #ifdef MCDBGQ_USE_RELACY
58 #include "relacy/relacy_std.hpp"
59 #include "relacy_shims.h"
60 // We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
61 // We'll override the default trait malloc ourselves without a macro.
62 #undef new
63 #undef delete
64 #undef malloc
65 #undef free
66 #else
67 #include <atomic> // Requires C++11. Sorry VS2010.
68 #include <cassert>
69 #endif
70 #include <cstddef> // for max_align_t
71 #include <cstdint>
72 #include <cstdlib>
73 #include <type_traits>
74 #include <algorithm>
75 #include <utility>
76 #include <limits>
77 #include <climits> // for CHAR_BIT
78 #include <array>
79 #include <thread> // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
80 
81 // Platform-specific definitions of a numeric thread ID type and an invalid value
82 namespace moodycamel { namespace details {
83  template<typename thread_id_t> struct thread_id_converter {
86  static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
87  };
88 } }
89 #if defined(MCDBGQ_USE_RELACY)
90 namespace moodycamel { namespace details {
91  typedef std::uint32_t thread_id_t;
92  static const thread_id_t invalid_thread_id = 0xFFFFFFFFU;
93  static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
94  static inline thread_id_t thread_id() { return rl::thread_index(); }
95 } }
96 #elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
97 // No sense pulling in windows.h in a header, we'll manually declare the function
98 // we use and rely on backwards-compatibility for this not to break
99 extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
100 namespace moodycamel { namespace details {
101  static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
102  typedef std::uint32_t thread_id_t;
103  static const thread_id_t invalid_thread_id = 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
104  static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
105  static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
106 } }
107 #elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
108 namespace moodycamel { namespace details {
109  static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
110 
112  static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID
113 
114  // Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
115  // only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
116  // be.
117  static inline thread_id_t thread_id() { return std::this_thread::get_id(); }
118 
119  template<std::size_t> struct thread_id_size { };
120  template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
121  template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };
122 
123  template<> struct thread_id_converter<thread_id_t> {
124  typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
125 #ifndef __APPLE__
126  typedef std::size_t thread_id_hash_t;
127 #else
129 #endif
130 
131  static thread_id_hash_t prehash(thread_id_t const& x)
132  {
133 #ifndef __APPLE__
134  return std::hash<std::thread::id>()(x);
135 #else
136  return *reinterpret_cast<thread_id_hash_t const*>(&x);
137 #endif
138  }
139  };
140 } }
141 #else
142 // Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
143 // In order to get a numeric thread ID in a platform-independent way, we use a thread-local
144 // static variable's address as a thread identifier :-)
145 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
146 #define MOODYCAMEL_THREADLOCAL __thread
147 #elif defined(_MSC_VER)
148 #define MOODYCAMEL_THREADLOCAL __declspec(thread)
149 #else
150 // Assume C++11 compliant compiler
151 #define MOODYCAMEL_THREADLOCAL thread_local
152 #endif
153 namespace moodycamel { namespace details {
155  static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr
156  static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
157  inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
158 } }
159 #endif
160 
161 // Constexpr if
162 #ifndef MOODYCAMEL_CONSTEXPR_IF
163 #if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
164 #define MOODYCAMEL_CONSTEXPR_IF if constexpr
165 #define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
166 #else
167 #define MOODYCAMEL_CONSTEXPR_IF if
168 #define MOODYCAMEL_MAYBE_UNUSED
169 #endif
170 #endif
171 
172 // Exceptions
173 #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
174 #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
175 #define MOODYCAMEL_EXCEPTIONS_ENABLED
176 #endif
177 #endif
178 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
179 #define MOODYCAMEL_TRY try
180 #define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
181 #define MOODYCAMEL_RETHROW throw
182 #define MOODYCAMEL_THROW(expr) throw (expr)
183 #else
184 #define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
185 #define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
186 #define MOODYCAMEL_RETHROW
187 #define MOODYCAMEL_THROW(expr)
188 #endif
189 
190 #ifndef MOODYCAMEL_NOEXCEPT
191 #if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
192 #define MOODYCAMEL_NOEXCEPT
193 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
194 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
195 #elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
196 // VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
197 // We have to assume *all* non-trivial constructors may throw on VS2012!
198 #define MOODYCAMEL_NOEXCEPT _NOEXCEPT
199 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
200 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
201 #elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
202 #define MOODYCAMEL_NOEXCEPT _NOEXCEPT
203 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
204 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
205 #else
206 #define MOODYCAMEL_NOEXCEPT noexcept
207 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
208 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
209 #endif
210 #endif
211 
212 #ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
213 #ifdef MCDBGQ_USE_RELACY
214 #define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
215 #else
216 // VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
217 // g++ <=4.7 doesn't support thread_local either.
218 // Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
219 #if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
220 // Assume `thread_local` is fully supported in all other C++11 compilers/platforms
221 //#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // always disabled for now since several users report having problems with it on
222 #endif
223 #endif
224 #endif
225 
226 // VS2012 doesn't support deleted functions.
227 // In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
228 #ifndef MOODYCAMEL_DELETE_FUNCTION
229 #if defined(_MSC_VER) && _MSC_VER < 1800
230 #define MOODYCAMEL_DELETE_FUNCTION
231 #else
232 #define MOODYCAMEL_DELETE_FUNCTION = delete
233 #endif
234 #endif
235 
236 namespace moodycamel { namespace details {
237 #ifndef MOODYCAMEL_ALIGNAS
238 // VS2013 doesn't support alignas or alignof, and align() requires a constant literal
239 #if defined(_MSC_VER) && _MSC_VER <= 1800
240 #define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
241 #define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
242 #define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
243  template<int Align, typename T> struct Vs2013Aligned { }; // default, unsupported alignment
244  template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
245  template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
246  template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
247  template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
248  template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
249  template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
250  template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
251  template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
252  template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
253 #else
254  template<typename T> struct identity { typedef T type; };
255 #define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
256 #define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
257 #define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
258 #endif
259 #endif
260 } }
261 
262 
263 // TSAN can false report races in lock-free code. To enable TSAN to be used from projects that use this one,
264 // we can apply per-function compile-time suppression.
265 // See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
266 #define MOODYCAMEL_NO_TSAN
267 #if defined(__has_feature)
268  #if __has_feature(thread_sanitizer)
269  #undef MOODYCAMEL_NO_TSAN
270  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
271  #endif // TSAN
272 #endif // TSAN
273 
274 // Compiler-specific likely/unlikely hints
275 namespace moodycamel { namespace details {
276 #if defined(__GNUC__)
277  static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
278  static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
279 #else
280  static inline bool (likely)(bool x) { return x; }
281  static inline bool (unlikely)(bool x) { return x; }
282 #endif
283 } }
284 
285 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
286 #include "internal/concurrentqueue_internal_debug.h"
287 #endif
288 
289 namespace moodycamel {
290 namespace details {
291  template<typename T>
293  static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
295  ? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
296  : static_cast<T>(-1);
297  };
298 
299 #if defined(__GLIBCXX__)
300  typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while
301 #else
302  typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std::
303 #endif
304 
305  // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
306  // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
307  typedef union {
309  long long y;
310  void* z;
311  } max_align_t;
312 }
313 
314 // Default traits for the ConcurrentQueue. To change some of the
315 // traits without re-implementing all of them, inherit from this
316 // struct and shadow the declarations you wish to be different;
317 // since the traits are used as a template type parameter, the
318 // shadowed declarations will be used where defined, and the defaults
319 // otherwise.
321 {
322  // General-purpose size type. std::size_t is strongly recommended.
323  typedef std::size_t size_t;
324 
325  // The type used for the enqueue and dequeue indices. Must be at least as
326  // large as size_t. Should be significantly larger than the number of elements
327  // you expect to hold at once, especially if you have a high turnover rate;
328  // for example, on 32-bit x86, if you expect to have over a hundred million
329  // elements or pump several million elements through your queue in a very
330  // short space of time, using a 32-bit type *may* trigger a race condition.
331  // A 64-bit int type is recommended in that case, and in practice will
332  // prevent a race condition no matter the usage of the queue. Note that
333  // whether the queue is lock-free with a 64-int type depends on the whether
334  // std::atomic<std::uint64_t> is lock-free, which is platform-specific.
335  typedef std::size_t index_t;
336 
337  // Internally, all elements are enqueued and dequeued from multi-element
338  // blocks; this is the smallest controllable unit. If you expect few elements
339  // but many producers, a smaller block size should be favoured. For few producers
340  // and/or many elements, a larger block size is preferred. A sane default
341  // is provided. Must be a power of 2.
342  static const size_t BLOCK_SIZE = 32;
343 
344  // For explicit producers (i.e. when using a producer token), the block is
345  // checked for being empty by iterating through a list of flags, one per element.
346  // For large block sizes, this is too inefficient, and switching to an atomic
347  // counter-based approach is faster. The switch is made for block sizes strictly
348  // larger than this threshold.
349  static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
350 
351  // How many full blocks can be expected for a single explicit producer? This should
352  // reflect that number's maximum for optimal performance. Must be a power of 2.
353  static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
354 
355  // How many full blocks can be expected for a single implicit producer? This should
356  // reflect that number's maximum for optimal performance. Must be a power of 2.
357  static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
358 
359  // The initial size of the hash table mapping thread IDs to implicit producers.
360  // Note that the hash is resized every time it becomes half full.
361  // Must be a power of two, and either 0 or at least 1. If 0, implicit production
362  // (using the enqueue methods without an explicit producer token) is disabled.
363  static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
364 
365  // Controls the number of items that an explicit consumer (i.e. one with a token)
366  // must consume before it causes all consumers to rotate and move on to the next
367  // internal queue.
368  static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
369 
370  // The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
371  // Enqueue operations that would cause this limit to be surpassed will fail. Note
372  // that this limit is enforced at the block level (for performance reasons), i.e.
373  // it's rounded up to the nearest block size.
375 
376  // The number of times to spin before sleeping when waiting on a semaphore.
377  // Recommended values are on the order of 1000-10000 unless the number of
378  // consumer threads exceeds the number of idle cores (in which case try 0-100).
379  // Only affects instances of the BlockingConcurrentQueue.
380  static const int MAX_SEMA_SPINS = 10000;
381 
382 
383 #ifndef MCDBGQ_USE_RELACY
384  // Memory allocation can be customized if needed.
385  // malloc should return nullptr on failure, and handle alignment like std::malloc.
386 #if defined(malloc) || defined(free)
387  // Gah, this is 2015, stop defining macros that break standard code already!
388  // Work around malloc/free being special macros:
389  static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
390  static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
391  static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
392  static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
393 #else
394  static inline void* malloc(size_t size) { return std::malloc(size); }
395  static inline void free(void* ptr) { return std::free(ptr); }
396 #endif
397 #else
398  // Debug versions when running under the Relacy race detector (ignore
399  // these in user code)
400  static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
401  static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
402 #endif
403 };
404 
405 
406 // When producing or consuming many elements, the most efficient way is to:
407 // 1) Use one of the bulk-operation methods of the queue with a token
408 // 2) Failing that, use the bulk-operation methods without a token
409 // 3) Failing that, create a token and use that with the single-item methods
410 // 4) Failing that, use the single-parameter methods of the queue
411 // Having said that, don't create tokens willy-nilly -- ideally there should be
412 // a maximum of one token per thread (of each kind).
413 struct ProducerToken;
414 struct ConsumerToken;
415 
416 template<typename T, typename Traits> class ConcurrentQueue;
417 template<typename T, typename Traits> class BlockingConcurrentQueue;
418 class ConcurrentQueueTests;
419 
420 
421 namespace details
422 {
424  {
426  std::atomic<bool> inactive;
428 
430  : next(nullptr), inactive(false), token(nullptr)
431  {
432  }
433  };
434 
435  template<bool use32> struct _hash_32_or_64 {
436  static inline std::uint32_t hash(std::uint32_t h)
437  {
438  // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
439  // Since the thread ID is already unique, all we really want to do is propagate that
440  // uniqueness evenly across all the bits, so that we can use a subset of the bits while
441  // reducing collisions significantly
442  h ^= h >> 16;
443  h *= 0x85ebca6b;
444  h ^= h >> 13;
445  h *= 0xc2b2ae35;
446  return h ^ (h >> 16);
447  }
448  };
449  template<> struct _hash_32_or_64<1> {
450  static inline std::uint64_t hash(std::uint64_t h)
451  {
452  h ^= h >> 33;
453  h *= 0xff51afd7ed558ccd;
454  h ^= h >> 33;
455  h *= 0xc4ceb9fe1a85ec53;
456  return h ^ (h >> 33);
457  }
458  };
459  template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { };
460 
461  static inline size_t hash_thread_id(thread_id_t id)
462  {
463  static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
464  return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
466  }
467 
468  template<typename T>
469  static inline bool circular_less_than(T a, T b)
470  {
471 #ifdef _MSC_VER
472 #pragma warning(push)
473 #pragma warning(disable: 4554)
474 #endif
475  static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");
476  return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << static_cast<T>(sizeof(T) * CHAR_BIT - 1));
477 #ifdef _MSC_VER
478 #pragma warning(pop)
479 #endif
480  }
481 
482  template<typename U>
483  static inline char* align_for(char* ptr)
484  {
485  const std::size_t alignment = std::alignment_of<U>::value;
486  return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
487  }
488 
489  template<typename T>
490  static inline T ceil_to_pow_2(T x)
491  {
492  static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");
493 
494  // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
495  --x;
496  x |= x >> 1;
497  x |= x >> 2;
498  x |= x >> 4;
499  for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
500  x |= x >> (i << 3);
501  }
502  ++x;
503  return x;
504  }
505 
506  template<typename T>
507  static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
508  {
509  T temp = std::move(left.load(std::memory_order_relaxed));
510  left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
511  right.store(std::move(temp), std::memory_order_relaxed);
512  }
513 
514  template<typename T>
515  static inline T const& nomove(T const& x)
516  {
517  return x;
518  }
519 
520  template<bool Enable>
521  struct nomove_if
522  {
523  template<typename T>
524  static inline T const& eval(T const& x)
525  {
526  return x;
527  }
528  };
529 
530  template<>
531  struct nomove_if<false>
532  {
533  template<typename U>
534  static inline auto eval(U&& x)
535  -> decltype(std::forward<U>(x))
536  {
537  return std::forward<U>(x);
538  }
539  };
540 
541  template<typename It>
542  static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
543  {
544  return *it;
545  }
546 
547 #if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
548  template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
549 #else
550  template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
551 #endif
552 
553 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
554 #ifdef MCDBGQ_USE_RELACY
555  typedef RelacyThreadExitListener ThreadExitListener;
556  typedef RelacyThreadExitNotifier ThreadExitNotifier;
557 #else
558  struct ThreadExitListener
559  {
560  typedef void (*callback_t)(void*);
561  callback_t callback;
562  void* userData;
563 
564  ThreadExitListener* next; // reserved for use by the ThreadExitNotifier
565  };
566 
567 
568  class ThreadExitNotifier
569  {
570  public:
571  static void subscribe(ThreadExitListener* listener)
572  {
573  auto& tlsInst = instance();
574  listener->next = tlsInst.tail;
575  tlsInst.tail = listener;
576  }
577 
578  static void unsubscribe(ThreadExitListener* listener)
579  {
580  auto& tlsInst = instance();
581  ThreadExitListener** prev = &tlsInst.tail;
582  for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
583  if (ptr == listener) {
584  *prev = ptr->next;
585  break;
586  }
587  prev = &ptr->next;
588  }
589  }
590 
591  private:
592  ThreadExitNotifier() : tail(nullptr) { }
593  ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
594  ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
595 
596  ~ThreadExitNotifier()
597  {
598  // This thread is about to exit, let everyone know!
599  assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
600  for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
601  ptr->callback(ptr->userData);
602  }
603  }
604 
605  // Thread-local
606  static inline ThreadExitNotifier& instance()
607  {
608  static thread_local ThreadExitNotifier notifier;
609  return notifier;
610  }
611 
612  private:
613  ThreadExitListener* tail;
614  };
615 #endif
616 #endif
617 
618  template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
619  template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
620  template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
621  template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; };
622  template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; };
623  template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; };
624  template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> { };
625  template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; };
626  template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; };
627 }
628 
629 
631 {
632  template<typename T, typename Traits>
633  explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
634 
635  template<typename T, typename Traits>
637 
639  : producer(other.producer)
640  {
641  other.producer = nullptr;
642  if (producer != nullptr) {
643  producer->token = this;
644  }
645  }
646 
648  {
649  swap(other);
650  return *this;
651  }
652 
654  {
655  std::swap(producer, other.producer);
656  if (producer != nullptr) {
657  producer->token = this;
658  }
659  if (other.producer != nullptr) {
660  other.producer->token = &other;
661  }
662  }
663 
664  // A token is always valid unless:
665  // 1) Memory allocation failed during construction
666  // 2) It was moved via the move constructor
667  // (Note: assignment does a swap, leaving both potentially valid)
668  // 3) The associated queue was destroyed
669  // Note that if valid() returns true, that only indicates
670  // that the token is valid for use with a specific queue,
671  // but not which one; that's up to the user to track.
672  inline bool valid() const { return producer != nullptr; }
673 
675  {
676  if (producer != nullptr) {
677  producer->token = nullptr;
678  producer->inactive.store(true, std::memory_order_release);
679  }
680  }
681 
682  // Disable copying and assignment
685 
686 private:
687  template<typename T, typename Traits> friend class ConcurrentQueue;
688  friend class ConcurrentQueueTests;
689 
690 protected:
692 };
693 
694 
696 {
697  template<typename T, typename Traits>
699 
700  template<typename T, typename Traits>
702 
704  : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
705  {
706  }
707 
709  {
710  swap(other);
711  return *this;
712  }
713 
715  {
716  std::swap(initialOffset, other.initialOffset);
717  std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
718  std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
719  std::swap(currentProducer, other.currentProducer);
720  std::swap(desiredProducer, other.desiredProducer);
721  }
722 
723  // Disable copying and assignment
726 
727 private:
728  template<typename T, typename Traits> friend class ConcurrentQueue;
729  friend class ConcurrentQueueTests;
730 
731 private: // but shared with ConcurrentQueue
732  std::uint32_t initialOffset;
733  std::uint32_t lastKnownGlobalOffset;
737 };
738 
739 // Need to forward-declare this swap because it's in a namespace.
740 // See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
741 template<typename T, typename Traits>
743 
744 
745 template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
746 class ConcurrentQueue
747 {
748 public:
749  typedef ::moodycamel::ProducerToken producer_token_t;
750  typedef ::moodycamel::ConsumerToken consumer_token_t;
751 
752  typedef typename Traits::index_t index_t;
753  typedef typename Traits::size_t size_t;
754 
755  static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
756  static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
757  static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
758  static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
759  static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
760  static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
761 #ifdef _MSC_VER
762 #pragma warning(push)
763 #pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!)
764 #pragma warning(disable: 4309) // static_cast: Truncation of constant value
765 #endif
766  static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
767 #ifdef _MSC_VER
768 #pragma warning(pop)
769 #endif
770 
771  static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");
772  static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
773  static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
774  static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
775  static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
776  static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
777  static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
778  static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
779  static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");
780 
781 public:
782  // Creates a queue with at least `capacity` element slots; note that the
783  // actual number of elements that can be inserted without additional memory
784  // allocation depends on the number of producers and the block size (e.g. if
785  // the block size is equal to `capacity`, only a single block will be allocated
786  // up-front, which means only a single producer will be able to enqueue elements
787  // without an extra allocation -- blocks aren't shared between producers).
788  // This method is not thread safe -- it is up to the user to ensure that the
789  // queue is fully constructed before it starts being used by other threads (this
790  // includes making the memory effects of construction visible, possibly with a
791  // memory barrier).
792  explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
793  : producerListTail(nullptr),
794  producerCount(0),
798  {
799  implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
801  populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
802 
803 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
804  // Track all the producers using a fully-resolved typed list for
805  // each kind; this makes it possible to debug them starting from
806  // the root queue object (otherwise wacky casts are needed that
807  // don't compile in the debugger's expression evaluator).
808  explicitProducers.store(nullptr, std::memory_order_relaxed);
809  implicitProducers.store(nullptr, std::memory_order_relaxed);
810 #endif
811  }
812 
813  // Computes the correct amount of pre-allocated blocks for you based
814  // on the minimum number of elements you want available at any given
815  // time, and the maximum concurrent number of each type of producer.
816  ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
817  : producerListTail(nullptr),
818  producerCount(0),
822  {
823  implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
825  size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
827 
828 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
829  explicitProducers.store(nullptr, std::memory_order_relaxed);
830  implicitProducers.store(nullptr, std::memory_order_relaxed);
831 #endif
832  }
833 
834  // Note: The queue should not be accessed concurrently while it's
835  // being deleted. It's up to the user to synchronize this.
836  // This method is not thread safe.
838  {
839  // Destroy producers
840  auto ptr = producerListTail.load(std::memory_order_relaxed);
841  while (ptr != nullptr) {
842  auto next = ptr->next_prod();
843  if (ptr->token != nullptr) {
844  ptr->token->producer = nullptr;
845  }
846  destroy(ptr);
847  ptr = next;
848  }
849 
850  // Destroy implicit producer hash tables
852  auto hash = implicitProducerHash.load(std::memory_order_relaxed);
853  while (hash != nullptr) {
854  auto prev = hash->prev;
855  if (prev != nullptr) { // The last hash is part of this object and was not allocated dynamically
856  for (size_t i = 0; i != hash->capacity; ++i) {
857  hash->entries[i].~ImplicitProducerKVP();
858  }
859  hash->~ImplicitProducerHash();
860  (Traits::free)(hash);
861  }
862  hash = prev;
863  }
864  }
865 
866  // Destroy global free list
867  auto block = freeList.head_unsafe();
868  while (block != nullptr) {
869  auto next = block->freeListNext.load(std::memory_order_relaxed);
870  if (block->dynamicallyAllocated) {
871  destroy(block);
872  }
873  block = next;
874  }
875 
876  // Destroy initial free list
878  }
879 
880  // Disable copying and copy assignment
883 
884  // Moving is supported, but note that it is *not* a thread-safe operation.
885  // Nobody can use the queue while it's being moved, and the memory effects
886  // of that move must be propagated to other threads before they can use it.
887  // Note: When a queue is moved, its tokens are still valid but can only be
888  // used with the destination queue (i.e. semantically they are moved along
889  // with the queue itself).
891  : producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
892  producerCount(other.producerCount.load(std::memory_order_relaxed)),
893  initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
894  initialBlockPool(other.initialBlockPool),
895  initialBlockPoolSize(other.initialBlockPoolSize),
896  freeList(std::move(other.freeList)),
897  nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
898  globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
899  {
900  // Move the other one into this, and leave the other one as an empty queue
901  implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
904 
905  other.producerListTail.store(nullptr, std::memory_order_relaxed);
906  other.producerCount.store(0, std::memory_order_relaxed);
907  other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
908  other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
909 
910 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
911  explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
912  other.explicitProducers.store(nullptr, std::memory_order_relaxed);
913  implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
914  other.implicitProducers.store(nullptr, std::memory_order_relaxed);
915 #endif
916 
917  other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
918  other.initialBlockPoolSize = 0;
919  other.initialBlockPool = nullptr;
920 
921  reown_producers();
922  }
923 
925  {
926  return swap_internal(other);
927  }
928 
929  // Swaps this queue's state with the other's. Not thread-safe.
930  // Swapping two queues does not invalidate their tokens, however
931  // the tokens that were created for one queue must be used with
932  // only the swapped queue (i.e. the tokens are tied to the
933  // queue's movable state, not the object itself).
935  {
936  swap_internal(other);
937  }
938 
939 private:
941  {
942  if (this == &other) {
943  return *this;
944  }
945 
951  freeList.swap(other.freeList);
954 
956 
957  reown_producers();
958  other.reown_producers();
959 
960 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
961  details::swap_relaxed(explicitProducers, other.explicitProducers);
962  details::swap_relaxed(implicitProducers, other.implicitProducers);
963 #endif
964 
965  return *this;
966  }
967 
968 public:
969  // Enqueues a single item (by copying it).
970  // Allocates memory if required. Only fails if memory allocation fails (or implicit
971  // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
972  // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
973  // Thread-safe.
974  inline bool enqueue(T const& item)
975  {
977  else return inner_enqueue<CanAlloc>(item);
978  }
979 
980  // Enqueues a single item (by moving it, if possible).
981  // Allocates memory if required. Only fails if memory allocation fails (or implicit
982  // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
983  // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
984  // Thread-safe.
985  inline bool enqueue(T&& item)
986  {
988  else return inner_enqueue<CanAlloc>(std::move(item));
989  }
990 
991  // Enqueues a single item (by copying it) using an explicit producer token.
992  // Allocates memory if required. Only fails if memory allocation fails (or
993  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
994  // Thread-safe.
995  inline bool enqueue(producer_token_t const& token, T const& item)
996  {
997  return inner_enqueue<CanAlloc>(token, item);
998  }
999 
1000  // Enqueues a single item (by moving it, if possible) using an explicit producer token.
1001  // Allocates memory if required. Only fails if memory allocation fails (or
1002  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
1003  // Thread-safe.
1004  inline bool enqueue(producer_token_t const& token, T&& item)
1005  {
1006  return inner_enqueue<CanAlloc>(token, std::move(item));
1007  }
1008 
1009  // Enqueues several items.
1010  // Allocates memory if required. Only fails if memory allocation fails (or
1011  // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
1012  // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
1013  // Note: Use std::make_move_iterator if the elements should be moved instead of copied.
1014  // Thread-safe.
1015  template<typename It>
1016  bool enqueue_bulk(It itemFirst, size_t count)
1017  {
1019  else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
1020  }
1021 
1022  // Enqueues several items using an explicit producer token.
1023  // Allocates memory if required. Only fails if memory allocation fails
1024  // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
1025  // Note: Use std::make_move_iterator if the elements should be moved
1026  // instead of copied.
1027  // Thread-safe.
1028  template<typename It>
1029  bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
1030  {
1031  return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
1032  }
1033 
1034  // Enqueues a single item (by copying it).
1035  // Does not allocate memory. Fails if not enough room to enqueue (or implicit
1036  // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
1037  // is 0).
1038  // Thread-safe.
1039  inline bool try_enqueue(T const& item)
1040  {
1042  else return inner_enqueue<CannotAlloc>(item);
1043  }
1044 
1045  // Enqueues a single item (by moving it, if possible).
1046  // Does not allocate memory (except for one-time implicit producer).
1047  // Fails if not enough room to enqueue (or implicit production is
1048  // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
1049  // Thread-safe.
1050  inline bool try_enqueue(T&& item)
1051  {
1053  else return inner_enqueue<CannotAlloc>(std::move(item));
1054  }
1055 
1056  // Enqueues a single item (by copying it) using an explicit producer token.
1057  // Does not allocate memory. Fails if not enough room to enqueue.
1058  // Thread-safe.
1059  inline bool try_enqueue(producer_token_t const& token, T const& item)
1060  {
1061  return inner_enqueue<CannotAlloc>(token, item);
1062  }
1063 
1064  // Enqueues a single item (by moving it, if possible) using an explicit producer token.
1065  // Does not allocate memory. Fails if not enough room to enqueue.
1066  // Thread-safe.
1067  inline bool try_enqueue(producer_token_t const& token, T&& item)
1068  {
1069  return inner_enqueue<CannotAlloc>(token, std::move(item));
1070  }
1071 
1072  // Enqueues several items.
1073  // Does not allocate memory (except for one-time implicit producer).
1074  // Fails if not enough room to enqueue (or implicit production is
1075  // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
1076  // Note: Use std::make_move_iterator if the elements should be moved
1077  // instead of copied.
1078  // Thread-safe.
1079  template<typename It>
1080  bool try_enqueue_bulk(It itemFirst, size_t count)
1081  {
1083  else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
1084  }
1085 
1086  // Enqueues several items using an explicit producer token.
1087  // Does not allocate memory. Fails if not enough room to enqueue.
1088  // Note: Use std::make_move_iterator if the elements should be moved
1089  // instead of copied.
1090  // Thread-safe.
1091  template<typename It>
1092  bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
1093  {
1094  return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
1095  }
1096 
1097 
1098 
1099  // Attempts to dequeue from the queue.
1100  // Returns false if all producer streams appeared empty at the time they
1101  // were checked (so, the queue is likely but not guaranteed to be empty).
1102  // Never allocates. Thread-safe.
1103  template<typename U>
1104  bool try_dequeue(U& item)
1105  {
1106  // Instead of simply trying each producer in turn (which could cause needless contention on the first
1107  // producer), we score them heuristically.
1108  size_t nonEmptyCount = 0;
1109  ProducerBase* best = nullptr;
1110  size_t bestSize = 0;
1111  for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
1112  auto size = ptr->size_approx();
1113  if (size > 0) {
1114  if (size > bestSize) {
1115  bestSize = size;
1116  best = ptr;
1117  }
1118  ++nonEmptyCount;
1119  }
1120  }
1121 
1122  // If there was at least one non-empty queue but it appears empty at the time
1123  // we try to dequeue from it, we need to make sure every queue's been tried
1124  if (nonEmptyCount > 0) {
1125  if ((details::likely)(best->dequeue(item))) {
1126  return true;
1127  }
1128  for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
1129  if (ptr != best && ptr->dequeue(item)) {
1130  return true;
1131  }
1132  }
1133  }
1134  return false;
1135  }
1136 
1137  // Attempts to dequeue from the queue.
1138  // Returns false if all producer streams appeared empty at the time they
1139  // were checked (so, the queue is likely but not guaranteed to be empty).
1140  // This differs from the try_dequeue(item) method in that this one does
1141  // not attempt to reduce contention by interleaving the order that producer
1142  // streams are dequeued from. So, using this method can reduce overall throughput
1143  // under contention, but will give more predictable results in single-threaded
1144  // consumer scenarios. This is mostly only useful for internal unit tests.
1145  // Never allocates. Thread-safe.
1146  template<typename U>
1148  {
1149  for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
1150  if (ptr->dequeue(item)) {
1151  return true;
1152  }
1153  }
1154  return false;
1155  }
1156 
1157  // Attempts to dequeue from the queue using an explicit consumer token.
1158  // Returns false if all producer streams appeared empty at the time they
1159  // were checked (so, the queue is likely but not guaranteed to be empty).
1160  // Never allocates. Thread-safe.
1161  template<typename U>
1162  bool try_dequeue(consumer_token_t& token, U& item)
1163  {
1164  // The idea is roughly as follows:
1165  // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
1166  // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
1167  // If there's no items where you're supposed to be, keep moving until you find a producer with some items
1168  // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
1169 
1170  if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
1172  return false;
1173  }
1174  }
1175 
1176  // If there was at least one non-empty queue but it appears empty at the time
1177  // we try to dequeue from it, we need to make sure every queue's been tried
1178  if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
1180  globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
1181  }
1182  return true;
1183  }
1184 
1185  auto tail = producerListTail.load(std::memory_order_acquire);
1186  auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
1187  if (ptr == nullptr) {
1188  ptr = tail;
1189  }
1190  while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
1191  if (ptr->dequeue(item)) {
1192  token.currentProducer = ptr;
1193  token.itemsConsumedFromCurrent = 1;
1194  return true;
1195  }
1196  ptr = ptr->next_prod();
1197  if (ptr == nullptr) {
1198  ptr = tail;
1199  }
1200  }
1201  return false;
1202  }
1203 
1204  // Attempts to dequeue several elements from the queue.
1205  // Returns the number of items actually dequeued.
1206  // Returns 0 if all producer streams appeared empty at the time they
1207  // were checked (so, the queue is likely but not guaranteed to be empty).
1208  // Never allocates. Thread-safe.
1209  template<typename It>
1210  size_t try_dequeue_bulk(It itemFirst, size_t max)
1211  {
1212  size_t count = 0;
1213  for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
1214  count += ptr->dequeue_bulk(itemFirst, max - count);
1215  if (count == max) {
1216  break;
1217  }
1218  }
1219  return count;
1220  }
1221 
1222  // Attempts to dequeue several elements from the queue using an explicit consumer token.
1223  // Returns the number of items actually dequeued.
1224  // Returns 0 if all producer streams appeared empty at the time they
1225  // were checked (so, the queue is likely but not guaranteed to be empty).
1226  // Never allocates. Thread-safe.
1227  template<typename It>
1228  size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
1229  {
1230  if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
1232  return 0;
1233  }
1234  }
1235 
1236  size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
1237  if (count == max) {
1238  if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
1239  globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
1240  }
1241  return max;
1242  }
1243  token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
1244  max -= count;
1245 
1246  auto tail = producerListTail.load(std::memory_order_acquire);
1247  auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
1248  if (ptr == nullptr) {
1249  ptr = tail;
1250  }
1251  while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
1252  auto dequeued = ptr->dequeue_bulk(itemFirst, max);
1253  count += dequeued;
1254  if (dequeued != 0) {
1255  token.currentProducer = ptr;
1256  token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
1257  }
1258  if (dequeued == max) {
1259  break;
1260  }
1261  max -= dequeued;
1262  ptr = ptr->next_prod();
1263  if (ptr == nullptr) {
1264  ptr = tail;
1265  }
1266  }
1267  return count;
1268  }
1269 
1270 
1271 
1272  // Attempts to dequeue from a specific producer's inner queue.
1273  // If you happen to know which producer you want to dequeue from, this
1274  // is significantly faster than using the general-case try_dequeue methods.
1275  // Returns false if the producer's queue appeared empty at the time it
1276  // was checked (so, the queue is likely but not guaranteed to be empty).
1277  // Never allocates. Thread-safe.
1278  template<typename U>
1279  inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)
1280  {
1281  return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
1282  }
1283 
1284  // Attempts to dequeue several elements from a specific producer's inner queue.
1285  // Returns the number of items actually dequeued.
1286  // If you happen to know which producer you want to dequeue from, this
1287  // is significantly faster than using the general-case try_dequeue methods.
1288  // Returns 0 if the producer's queue appeared empty at the time it
1289  // was checked (so, the queue is likely but not guaranteed to be empty).
1290  // Never allocates. Thread-safe.
1291  template<typename It>
1292  inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)
1293  {
1294  return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
1295  }
1296 
1297 
1298  // Returns an estimate of the total number of elements currently in the queue. This
1299  // estimate is only accurate if the queue has completely stabilized before it is called
1300  // (i.e. all enqueue and dequeue operations have completed and their memory effects are
1301  // visible on the calling thread, and no further operations start while this method is
1302  // being called).
1303  // Thread-safe.
1304  size_t size_approx() const
1305  {
1306  size_t size = 0;
1307  for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
1308  size += ptr->size_approx();
1309  }
1310  return size;
1311  }
1312 
1313 
1314  // Returns true if the underlying atomic variables used by
1315  // the queue are lock-free (they should be on most platforms).
1316  // Thread-safe.
1317  static bool is_lock_free()
1318  {
1319  return
1326  }
1327 
1328 
1329 private:
1330  friend struct ProducerToken;
1331  friend struct ConsumerToken;
1333  friend struct ExplicitProducer;
1335  friend struct ImplicitProducer;
1336  friend class ConcurrentQueueTests;
1337 
1339 
1340 
1342  // Queue methods
1344 
1345  template<AllocationMode canAlloc, typename U>
1346  inline bool inner_enqueue(producer_token_t const& token, U&& element)
1347  {
1348  return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
1349  }
1350 
1351  template<AllocationMode canAlloc, typename U>
1352  inline bool inner_enqueue(U&& element)
1353  {
1354  auto producer = get_or_add_implicit_producer();
1355  return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
1356  }
1357 
1358  template<AllocationMode canAlloc, typename It>
1359  inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
1360  {
1361  return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
1362  }
1363 
1364  template<AllocationMode canAlloc, typename It>
1365  inline bool inner_enqueue_bulk(It itemFirst, size_t count)
1366  {
1367  auto producer = get_or_add_implicit_producer();
1368  return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
1369  }
1370 
1372  {
1373  // Ah, there's been a rotation, figure out where we should be!
1374  auto tail = producerListTail.load(std::memory_order_acquire);
1375  if (token.desiredProducer == nullptr && tail == nullptr) {
1376  return false;
1377  }
1378  auto prodCount = producerCount.load(std::memory_order_relaxed);
1379  auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);
1380  if ((details::unlikely)(token.desiredProducer == nullptr)) {
1381  // Aha, first time we're dequeueing anything.
1382  // Figure out our local position
1383  // Note: offset is from start, not end, but we're traversing from end -- subtract from count first
1384  std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
1385  token.desiredProducer = tail;
1386  for (std::uint32_t i = 0; i != offset; ++i) {
1387  token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
1388  if (token.desiredProducer == nullptr) {
1389  token.desiredProducer = tail;
1390  }
1391  }
1392  }
1393 
1394  std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
1395  if (delta >= prodCount) {
1396  delta = delta % prodCount;
1397  }
1398  for (std::uint32_t i = 0; i != delta; ++i) {
1399  token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
1400  if (token.desiredProducer == nullptr) {
1401  token.desiredProducer = tail;
1402  }
1403  }
1404 
1405  token.lastKnownGlobalOffset = globalOffset;
1406  token.currentProducer = token.desiredProducer;
1407  token.itemsConsumedFromCurrent = 0;
1408  return true;
1409  }
1410 
1411 
1413  // Free list
1415 
1416  template <typename N>
1418  {
1420 
1421  std::atomic<std::uint32_t> freeListRefs;
1422  std::atomic<N*> freeListNext;
1423  };
1424 
1425  // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
1426  // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
1427  // speedy under low contention.
1428  template<typename N> // N must inherit FreeListNode or have the same fields (and initialization of them)
1429  struct FreeList
1430  {
1431  FreeList() : freeListHead(nullptr) { }
1432  FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
1434 
1437 
1438  inline void add(N* node)
1439  {
1440 #ifdef MCDBGQ_NOLOCKFREE_FREELIST
1441  debug::DebugLock lock(mutex);
1442 #endif
1443  // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
1444  // set it using a fetch_add
1445  if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
1446  // Oh look! We were the last ones referencing this node, and we know
1447  // we want to add it to the free list, so let's do it!
1449  }
1450  }
1451 
1452  inline N* try_get()
1453  {
1454 #ifdef MCDBGQ_NOLOCKFREE_FREELIST
1455  debug::DebugLock lock(mutex);
1456 #endif
1457  auto head = freeListHead.load(std::memory_order_acquire);
1458  while (head != nullptr) {
1459  auto prevHead = head;
1460  auto refs = head->freeListRefs.load(std::memory_order_relaxed);
1461  if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {
1462  head = freeListHead.load(std::memory_order_acquire);
1463  continue;
1464  }
1465 
1466  // Good, reference count has been incremented (it wasn't at zero), which means we can read the
1467  // next and not worry about it changing between now and the time we do the CAS
1468  auto next = head->freeListNext.load(std::memory_order_relaxed);
1469  if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) {
1470  // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
1471  // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
1472  assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
1473 
1474  // Decrease refcount twice, once for our ref, and once for the list's ref
1475  head->freeListRefs.fetch_sub(2, std::memory_order_release);
1476  return head;
1477  }
1478 
1479  // OK, the head must have changed on us, but we still need to decrease the refcount we increased.
1480  // Note that we don't need to release any memory effects, but we do need to ensure that the reference
1481  // count decrement happens-after the CAS on the head.
1482  refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
1483  if (refs == SHOULD_BE_ON_FREELIST + 1) {
1484  add_knowing_refcount_is_zero(prevHead);
1485  }
1486  }
1487 
1488  return nullptr;
1489  }
1490 
1491  // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
1492  N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
1493 
1494  private:
1495  inline void add_knowing_refcount_is_zero(N* node)
1496  {
1497  // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
1498  // only one copy of this method per node at a time, i.e. the single thread case), then we know
1499  // we can safely change the next pointer of the node; however, once the refcount is back above
1500  // zero, then other threads could increase it (happens under heavy contention, when the refcount
1501  // goes to zero in between a load and a refcount increment of a node in try_get, then back up to
1502  // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
1503  // to add the node to the actual list fails, decrease the refcount and leave the add operation to
1504  // the next thread who puts the refcount back at zero (which could be us, hence the loop).
1505  auto head = freeListHead.load(std::memory_order_relaxed);
1506  while (true) {
1507  node->freeListNext.store(head, std::memory_order_relaxed);
1508  node->freeListRefs.store(1, std::memory_order_release);
1509  if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) {
1510  // Hmm, the add failed, but we can only try again when the refcount goes back to zero
1511  if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {
1512  continue;
1513  }
1514  }
1515  return;
1516  }
1517  }
1518 
1519  private:
1520  // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
1521  std::atomic<N*> freeListHead;
1522 
1523  static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
1524  static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
1525 
1526 #ifdef MCDBGQ_NOLOCKFREE_FREELIST
1527  debug::DebugMutex mutex;
1528 #endif
1529  };
1530 
1531 
1533  // Block
1535 
1537 
1538  struct Block
1539  {
1542  {
1543 #ifdef MCDBGQ_TRACKMEM
1544  owner = nullptr;
1545 #endif
1546  }
1547 
1548  template<InnerQueueContext context>
1549  inline bool is_empty() const
1550  {
1552  // Check flags
1553  for (size_t i = 0; i < BLOCK_SIZE; ++i) {
1554  if (!emptyFlags[i].load(std::memory_order_relaxed)) {
1555  return false;
1556  }
1557  }
1558 
1559  // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
1560  std::atomic_thread_fence(std::memory_order_acquire);
1561  return true;
1562  }
1563  else {
1564  // Check counter
1565  if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
1566  std::atomic_thread_fence(std::memory_order_acquire);
1567  return true;
1568  }
1569  assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
1570  return false;
1571  }
1572  }
1573 
1574  // Returns true if the block is now empty (does not apply in explicit context)
1575  template<InnerQueueContext context>
1577  {
1579  // Set flag
1580  assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
1581  emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
1582  return false;
1583  }
1584  else {
1585  // Increment counter
1586  auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
1587  assert(prevVal < BLOCK_SIZE);
1588  return prevVal == BLOCK_SIZE - 1;
1589  }
1590  }
1591 
1592  // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
1593  // Returns true if the block is now empty (does not apply in explicit context).
1594  template<InnerQueueContext context>
1596  {
1598  // Set flags
1599  std::atomic_thread_fence(std::memory_order_release);
1600  i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
1601  for (size_t j = 0; j != count; ++j) {
1602  assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
1603  emptyFlags[i + j].store(true, std::memory_order_relaxed);
1604  }
1605  return false;
1606  }
1607  else {
1608  // Increment counter
1609  auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
1610  assert(prevVal + count <= BLOCK_SIZE);
1611  return prevVal + count == BLOCK_SIZE;
1612  }
1613  }
1614 
1615  template<InnerQueueContext context>
1616  inline void set_all_empty()
1617  {
1619  // Set all flags
1620  for (size_t i = 0; i != BLOCK_SIZE; ++i) {
1621  emptyFlags[i].store(true, std::memory_order_relaxed);
1622  }
1623  }
1624  else {
1625  // Reset counter
1626  elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
1627  }
1628  }
1629 
1630  template<InnerQueueContext context>
1631  inline void reset_empty()
1632  {
1634  // Reset flags
1635  for (size_t i = 0; i != BLOCK_SIZE; ++i) {
1636  emptyFlags[i].store(false, std::memory_order_relaxed);
1637  }
1638  }
1639  else {
1640  // Reset counter
1641  elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
1642  }
1643  }
1644 
1645  inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
1646  inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
1647 
1648  private:
1649  static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
1651  public:
1653  std::atomic<size_t> elementsCompletelyDequeued;
1655  public:
1656  std::atomic<std::uint32_t> freeListRefs;
1657  std::atomic<Block*> freeListNext;
1658  std::atomic<bool> shouldBeOnFreeList;
1659  bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
1660 
1661 #ifdef MCDBGQ_TRACKMEM
1662  void* owner;
1663 #endif
1664  };
1665  static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
1666 
1667 
1668 #ifdef MCDBGQ_TRACKMEM
1669 public:
1670  struct MemStats;
1671 private:
1672 #endif
1673 
1675  // Producer base
1677 
1679  {
1680  ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
1681  tailIndex(0),
1682  headIndex(0),
1684  dequeueOvercommit(0),
1685  tailBlock(nullptr),
1686  isExplicit(isExplicit_),
1687  parent(parent_)
1688  {
1689  }
1690 
1691  virtual ~ProducerBase() { }
1692 
1693  template<typename U>
1694  inline bool dequeue(U& element)
1695  {
1696  if (isExplicit) {
1697  return static_cast<ExplicitProducer*>(this)->dequeue(element);
1698  }
1699  else {
1700  return static_cast<ImplicitProducer*>(this)->dequeue(element);
1701  }
1702  }
1703 
1704  template<typename It>
1705  inline size_t dequeue_bulk(It& itemFirst, size_t max)
1706  {
1707  if (isExplicit) {
1708  return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
1709  }
1710  else {
1711  return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
1712  }
1713  }
1714 
1715  inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
1716 
1717  inline size_t size_approx() const
1718  {
1719  auto tail = tailIndex.load(std::memory_order_relaxed);
1720  auto head = headIndex.load(std::memory_order_relaxed);
1721  return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
1722  }
1723 
1724  inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
1725  protected:
1726  std::atomic<index_t> tailIndex; // Where to enqueue to next
1727  std::atomic<index_t> headIndex; // Where to dequeue from next
1728 
1729  std::atomic<index_t> dequeueOptimisticCount;
1730  std::atomic<index_t> dequeueOvercommit;
1731 
1733 
1734  public:
1737 
1738  protected:
1739 #ifdef MCDBGQ_TRACKMEM
1740  friend struct MemStats;
1741 #endif
1742  };
1743 
1744 
1746  // Explicit queue
1748 
1750  {
1751  explicit ExplicitProducer(ConcurrentQueue* parent_) :
1752  ProducerBase(parent_, true),
1753  blockIndex(nullptr),
1756  pr_blockIndexFront(0),
1757  pr_blockIndexEntries(nullptr),
1758  pr_blockIndexRaw(nullptr)
1759  {
1760  size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
1761  if (poolBasedIndexSize > pr_blockIndexSize) {
1762  pr_blockIndexSize = poolBasedIndexSize;
1763  }
1764 
1765  new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
1766  }
1767 
1769  {
1770  // Destruct any elements not yet dequeued.
1771  // Since we're in the destructor, we can assume all elements
1772  // are either completely dequeued or completely not (no halfways).
1773  if (this->tailBlock != nullptr) { // Note this means there must be a block index too
1774  // First find the block that's partially dequeued, if any
1775  Block* halfDequeuedBlock = nullptr;
1776  if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
1777  // The head's not on a block boundary, meaning a block somewhere is partially dequeued
1778  // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
1780  while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
1781  i = (i + 1) & (pr_blockIndexSize - 1);
1782  }
1783  assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
1784  halfDequeuedBlock = pr_blockIndexEntries[i].block;
1785  }
1786 
1787  // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
1788  auto block = this->tailBlock;
1789  do {
1790  block = block->next;
1791  if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
1792  continue;
1793  }
1794 
1795  size_t i = 0; // Offset into block
1796  if (block == halfDequeuedBlock) {
1797  i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
1798  }
1799 
1800  // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
1801  auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
1802  while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
1803  (*block)[i++]->~T();
1804  }
1805  } while (block != this->tailBlock);
1806  }
1807 
1808  // Destroy all blocks that we own
1809  if (this->tailBlock != nullptr) {
1810  auto block = this->tailBlock;
1811  do {
1812  auto nextBlock = block->next;
1813  if (block->dynamicallyAllocated) {
1814  destroy(block);
1815  }
1816  else {
1817  this->parent->add_block_to_free_list(block);
1818  }
1819  block = nextBlock;
1820  } while (block != this->tailBlock);
1821  }
1822 
1823  // Destroy the block indices
1824  auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
1825  while (header != nullptr) {
1826  auto prev = static_cast<BlockIndexHeader*>(header->prev);
1827  header->~BlockIndexHeader();
1828  (Traits::free)(header);
1829  header = prev;
1830  }
1831  }
1832 
1833  template<AllocationMode allocMode, typename U>
1834  inline bool enqueue(U&& element)
1835  {
1836  index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
1837  index_t newTailIndex = 1 + currentTailIndex;
1838  if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
1839  // We reached the end of a block, start a new one
1840  auto startBlock = this->tailBlock;
1841  auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
1842  if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
1843  // We can re-use the block ahead of us, it's empty!
1844  this->tailBlock = this->tailBlock->next;
1845  this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
1846 
1847  // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
1848  // last block from it first -- except instead of removing then adding, we can just overwrite).
1849  // Note that there must be a valid block index here, since even if allocation failed in the ctor,
1850  // it would have been re-attempted when adding the first block to the queue; since there is such
1851  // a block, a block index must have been successfully allocated.
1852  }
1853  else {
1854  // Whatever head value we see here is >= the last value we saw here (relatively),
1855  // and <= its current value. Since we have the most recent tail, the head must be
1856  // <= to it.
1857  auto head = this->headIndex.load(std::memory_order_relaxed);
1858  assert(!details::circular_less_than<index_t>(currentTailIndex, head));
1859  if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
1861  // We can't enqueue in another block because there's not enough leeway -- the
1862  // tail could surpass the head by the time the block fills up! (Or we'll exceed
1863  // the size limit, if the second part of the condition was true.)
1864  return false;
1865  }
1866  // We're going to need a new block; check that the block index has room
1868  // Hmm, the circular block index is already full -- we'll need
1869  // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
1870  // the initial allocation failed in the constructor.
1871 
1872  MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
1873  return false;
1874  }
1876  return false;
1877  }
1878  }
1879 
1880  // Insert a new block in the circular linked list
1881  auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
1882  if (newBlock == nullptr) {
1883  return false;
1884  }
1885 #ifdef MCDBGQ_TRACKMEM
1886  newBlock->owner = this;
1887 #endif
1888  newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
1889  if (this->tailBlock == nullptr) {
1890  newBlock->next = newBlock;
1891  }
1892  else {
1893  newBlock->next = this->tailBlock->next;
1894  this->tailBlock->next = newBlock;
1895  }
1896  this->tailBlock = newBlock;
1898  }
1899 
1900  MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
1901  // The constructor may throw. We want the element not to appear in the queue in
1902  // that case (without corrupting the queue):
1903  MOODYCAMEL_TRY {
1904  new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
1905  }
1906  MOODYCAMEL_CATCH (...) {
1907  // Revert change to the current block, but leave the new block available
1908  // for next time
1909  pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
1910  this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
1912  }
1913  }
1914  else {
1915  (void)startBlock;
1916  (void)originalBlockIndexSlotsUsed;
1917  }
1918 
1919  // Add block to block index
1920  auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
1921  entry.base = currentTailIndex;
1922  entry.block = this->tailBlock;
1923  blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
1925 
1926  MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
1927  this->tailIndex.store(newTailIndex, std::memory_order_release);
1928  return true;
1929  }
1930  }
1931 
1932  // Enqueue
1933  new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
1934 
1935  this->tailIndex.store(newTailIndex, std::memory_order_release);
1936  return true;
1937  }
1938 
1939  template<typename U>
1940  bool dequeue(U& element)
1941  {
1942  auto tail = this->tailIndex.load(std::memory_order_relaxed);
1943  auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
1944  if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
1945  // Might be something to dequeue, let's give it a try
1946 
1947  // Note that this if is purely for performance purposes in the common case when the queue is
1948  // empty and the values are eventually consistent -- we may enter here spuriously.
1949 
1950  // Note that whatever the values of overcommit and tail are, they are not going to change (unless we
1951  // change them) and must be the same value at this point (inside the if) as when the if condition was
1952  // evaluated.
1953 
1954  // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
1955  // This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
1956  // the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
1957  // Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
1958  // read-modify-write operations are guaranteed to work on the latest value in the modification order), but
1959  // unfortunately that can't be shown to be correct using only the C++11 standard.
1960  // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
1961  std::atomic_thread_fence(std::memory_order_acquire);
1962 
1963  // Increment optimistic counter, then check if it went over the boundary
1964  auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
1965 
1966  // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
1967  // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
1968  // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
1969  // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
1970  // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
1971  // overflow; in such a case, though, the logic still holds since the difference between the two is maintained.
1972 
1973  // Note that we reload tail here in case it changed; it will be the same value as before or greater, since
1974  // this load is sequenced after (happens after) the earlier load above. This is supported by read-read
1975  // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
1976  tail = this->tailIndex.load(std::memory_order_acquire);
1977  if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
1978  // Guaranteed to be at least one element to dequeue!
1979 
1980  // Get the index. Note that since there's guaranteed to be at least one element, this
1981  // will never exceed tail. We need to do an acquire-release fence here since it's possible
1982  // that whatever condition got us to this point was for an earlier enqueued element (that
1983  // we already see the memory effects for), but that by the time we increment somebody else
1984  // has incremented it, and we need to see the memory effects for *that* element, which is
1985  // in such a case is necessarily visible on the thread that incremented it in the first
1986  // place with the more current condition (they must have acquired a tail that is at least
1987  // as recent).
1988  auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
1989 
1990 
1991  // Determine which block the element is in
1992 
1993  auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
1994  auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
1995 
1996  // We need to be careful here about subtracting and dividing because of index wrap-around.
1997  // When an index wraps, we need to preserve the sign of the offset when dividing it by the
1998  // block size (in order to get a correct signed block count offset in all cases):
1999  auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
2000  auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
2001  auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);
2002  auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
2003 
2004  // Dequeue
2005  auto& el = *((*block)[index]);
2006  if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
2007  // Make sure the element is still fully dequeued and destroyed even if the assignment
2008  // throws
2009  struct Guard {
2010  Block* block;
2011  index_t index;
2012 
2013  ~Guard()
2014  {
2015  (*block)[index]->~T();
2016  block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
2017  }
2018  } guard = { block, index };
2019 
2020  element = std::move(el); // NOLINT
2021  }
2022  else {
2023  element = std::move(el); // NOLINT
2024  el.~T(); // NOLINT
2025  block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
2026  }
2027 
2028  return true;
2029  }
2030  else {
2031  // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
2032  this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
2033  }
2034  }
2035 
2036  return false;
2037  }
2038 
2039  template<AllocationMode allocMode, typename It>
2040  bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
2041  {
2042  // First, we need to make sure we have enough room to enqueue all of the elements;
2043  // this means pre-allocating blocks and putting them in the block index (but only if
2044  // all the allocations succeeded).
2045  index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
2046  auto startBlock = this->tailBlock;
2047  auto originalBlockIndexFront = pr_blockIndexFront;
2048  auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
2049 
2050  Block* firstAllocatedBlock = nullptr;
2051 
2052  // Figure out how many blocks we'll need to allocate, and do so
2053  size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
2054  index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
2055  if (blockBaseDiff > 0) {
2056  // Allocate as many blocks as possible from ahead
2057  while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
2058  blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
2059  currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
2060 
2061  this->tailBlock = this->tailBlock->next;
2062  firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
2063 
2064  auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
2065  entry.base = currentTailIndex;
2066  entry.block = this->tailBlock;
2068  }
2069 
2070  // Now allocate as many blocks as necessary from the block pool
2071  while (blockBaseDiff > 0) {
2072  blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
2073  currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
2074 
2075  auto head = this->headIndex.load(std::memory_order_relaxed);
2076  assert(!details::circular_less_than<index_t>(currentTailIndex, head));
2077  bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
2078  if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
2079  MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
2080  // Failed to allocate, undo changes (but keep injected blocks)
2081  pr_blockIndexFront = originalBlockIndexFront;
2082  pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
2083  this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
2084  return false;
2085  }
2086  else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
2087  // Failed to allocate, undo changes (but keep injected blocks)
2088  pr_blockIndexFront = originalBlockIndexFront;
2089  pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
2090  this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
2091  return false;
2092  }
2093 
2094  // pr_blockIndexFront is updated inside new_block_index, so we need to
2095  // update our fallback value too (since we keep the new index even if we
2096  // later fail)
2097  originalBlockIndexFront = originalBlockIndexSlotsUsed;
2098  }
2099 
2100  // Insert a new block in the circular linked list
2101  auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
2102  if (newBlock == nullptr) {
2103  pr_blockIndexFront = originalBlockIndexFront;
2104  pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
2105  this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
2106  return false;
2107  }
2108 
2109 #ifdef MCDBGQ_TRACKMEM
2110  newBlock->owner = this;
2111 #endif
2112  newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
2113  if (this->tailBlock == nullptr) {
2114  newBlock->next = newBlock;
2115  }
2116  else {
2117  newBlock->next = this->tailBlock->next;
2118  this->tailBlock->next = newBlock;
2119  }
2120  this->tailBlock = newBlock;
2121  firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
2122 
2124 
2125  auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
2126  entry.base = currentTailIndex;
2127  entry.block = this->tailBlock;
2129  }
2130 
2131  // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
2132  // publish the new block index front
2133  auto block = firstAllocatedBlock;
2134  while (true) {
2135  block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
2136  if (block == this->tailBlock) {
2137  break;
2138  }
2139  block = block->next;
2140  }
2141 
2142  MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
2143  blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
2144  }
2145  }
2146 
2147  // Enqueue, one block at a time
2148  index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
2149  currentTailIndex = startTailIndex;
2150  auto endBlock = this->tailBlock;
2151  this->tailBlock = startBlock;
2152  assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
2153  if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
2154  this->tailBlock = firstAllocatedBlock;
2155  }
2156  while (true) {
2157  index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
2158  if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
2159  stopIndex = newTailIndex;
2160  }
2161  MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
2162  while (currentTailIndex != stopIndex) {
2163  new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
2164  }
2165  }
2166  else {
2167  MOODYCAMEL_TRY {
2168  while (currentTailIndex != stopIndex) {
2169  // Must use copy constructor even if move constructor is available
2170  // because we may have to revert if there's an exception.
2171  // Sorry about the horrible templated next line, but it was the only way
2172  // to disable moving *at compile time*, which is important because a type
2173  // may only define a (noexcept) move constructor, and so calls to the
2174  // cctor will not compile, even if they are in an if branch that will never
2175  // be executed
2176  new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
2177  ++currentTailIndex;
2178  ++itemFirst;
2179  }
2180  }
2181  MOODYCAMEL_CATCH (...) {
2182  // Oh dear, an exception's been thrown -- destroy the elements that
2183  // were enqueued so far and revert the entire bulk operation (we'll keep
2184  // any allocated blocks in our linked list for later, though).
2185  auto constructedStopIndex = currentTailIndex;
2186  auto lastBlockEnqueued = this->tailBlock;
2187 
2188  pr_blockIndexFront = originalBlockIndexFront;
2189  pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
2190  this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
2191 
2193  auto block = startBlock;
2194  if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
2195  block = firstAllocatedBlock;
2196  }
2197  currentTailIndex = startTailIndex;
2198  while (true) {
2199  stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
2200  if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
2201  stopIndex = constructedStopIndex;
2202  }
2203  while (currentTailIndex != stopIndex) {
2204  (*block)[currentTailIndex++]->~T();
2205  }
2206  if (block == lastBlockEnqueued) {
2207  break;
2208  }
2209  block = block->next;
2210  }
2211  }
2213  }
2214  }
2215 
2216  if (this->tailBlock == endBlock) {
2217  assert(currentTailIndex == newTailIndex);
2218  break;
2219  }
2220  this->tailBlock = this->tailBlock->next;
2221  }
2222 
2223  MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
2224  if (firstAllocatedBlock != nullptr)
2225  blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
2226  }
2227 
2228  this->tailIndex.store(newTailIndex, std::memory_order_release);
2229  return true;
2230  }
2231 
2232  template<typename It>
2233  size_t dequeue_bulk(It& itemFirst, size_t max)
2234  {
2235  auto tail = this->tailIndex.load(std::memory_order_relaxed);
2236  auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
2237  auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
2238  if (details::circular_less_than<size_t>(0, desiredCount)) {
2239  desiredCount = desiredCount < max ? desiredCount : max;
2240  std::atomic_thread_fence(std::memory_order_acquire);
2241 
2242  auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
2243 
2244  tail = this->tailIndex.load(std::memory_order_acquire);
2245  auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
2246  if (details::circular_less_than<size_t>(0, actualCount)) {
2247  actualCount = desiredCount < actualCount ? desiredCount : actualCount;
2248  if (actualCount < desiredCount) {
2249  this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
2250  }
2251 
2252  // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
2253  // will never exceed tail.
2254  auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
2255 
2256  // Determine which block the first element is in
2257  auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
2258  auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
2259 
2260  auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
2261  auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
2262  auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
2263  auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
2264 
2265  // Iterate the blocks and dequeue
2266  auto index = firstIndex;
2267  do {
2268  auto firstIndexInBlock = index;
2269  index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
2270  endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
2271  auto block = localBlockIndex->entries[indexIndex].block;
2272  if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
2273  while (index != endIndex) {
2274  auto& el = *((*block)[index]);
2275  *itemFirst++ = std::move(el);
2276  el.~T();
2277  ++index;
2278  }
2279  }
2280  else {
2281  MOODYCAMEL_TRY {
2282  while (index != endIndex) {
2283  auto& el = *((*block)[index]);
2284  *itemFirst = std::move(el);
2285  ++itemFirst;
2286  el.~T();
2287  ++index;
2288  }
2289  }
2290  MOODYCAMEL_CATCH (...) {
2291  // It's too late to revert the dequeue, but we can make sure that all
2292  // the dequeued objects are properly destroyed and the block index
2293  // (and empty count) are properly updated before we propagate the exception
2294  do {
2295  block = localBlockIndex->entries[indexIndex].block;
2296  while (index != endIndex) {
2297  (*block)[index++]->~T();
2298  }
2299  block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
2300  indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
2301 
2302  firstIndexInBlock = index;
2303  endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
2304  endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
2305  } while (index != firstIndex + actualCount);
2306 
2308  }
2309  }
2310  block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
2311  indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
2312  } while (index != firstIndex + actualCount);
2313 
2314  return actualCount;
2315  }
2316  else {
2317  // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
2318  this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
2319  }
2320  }
2321 
2322  return 0;
2323  }
2324 
2325  private:
2327  {
2330  };
2331 
2333  {
2334  size_t size;
2335  std::atomic<size_t> front; // Current slot (not next, like pr_blockIndexFront)
2337  void* prev;
2338  };
2339 
2340 
2341  bool new_block_index(size_t numberOfFilledSlotsToExpose)
2342  {
2343  auto prevBlockSizeMask = pr_blockIndexSize - 1;
2344 
2345  // Create the new block
2346  pr_blockIndexSize <<= 1;
2347  auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
2348  if (newRawPtr == nullptr) {
2349  pr_blockIndexSize >>= 1; // Reset to allow graceful retry
2350  return false;
2351  }
2352 
2353  auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
2354 
2355  // Copy in all the old indices, if any
2356  size_t j = 0;
2357  if (pr_blockIndexSlotsUsed != 0) {
2358  auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
2359  do {
2360  newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
2361  i = (i + 1) & prevBlockSizeMask;
2362  } while (i != pr_blockIndexFront);
2363  }
2364 
2365  // Update everything
2366  auto header = new (newRawPtr) BlockIndexHeader;
2367  header->size = pr_blockIndexSize;
2368  header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
2369  header->entries = newBlockIndexEntries;
2370  header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later
2371 
2372  pr_blockIndexFront = j;
2373  pr_blockIndexEntries = newBlockIndexEntries;
2374  pr_blockIndexRaw = newRawPtr;
2375  blockIndex.store(header, std::memory_order_release);
2376 
2377  return true;
2378  }
2379 
2380  private:
2381  std::atomic<BlockIndexHeader*> blockIndex;
2382 
2383  // To be used by producer only -- consumer must use the ones in referenced by blockIndex
2386  size_t pr_blockIndexFront; // Next slot (not current)
2389 
2390 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
2391  public:
2392  ExplicitProducer* nextExplicitProducer;
2393  private:
2394 #endif
2395 
2396 #ifdef MCDBGQ_TRACKMEM
2397  friend struct MemStats;
2398 #endif
2399  };
2400 
2401 
2403  // Implicit queue
2405 
2407  {
2409  ProducerBase(parent_, false),
2411  blockIndex(nullptr)
2412  {
2413  new_block_index();
2414  }
2415 
2417  {
2418  // Note that since we're in the destructor we can assume that all enqueue/dequeue operations
2419  // completed already; this means that all undequeued elements are placed contiguously across
2420  // contiguous blocks, and that only the first and last remaining blocks can be only partially
2421  // empty (all other remaining blocks must be completely full).
2422 
2423 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
2424  // Unregister ourselves for thread termination notification
2425  if (!this->inactive.load(std::memory_order_relaxed)) {
2426  details::ThreadExitNotifier::unsubscribe(&threadExitListener);
2427  }
2428 #endif
2429 
2430  // Destroy all remaining elements!
2431  auto tail = this->tailIndex.load(std::memory_order_relaxed);
2432  auto index = this->headIndex.load(std::memory_order_relaxed);
2433  Block* block = nullptr;
2434  assert(index == tail || details::circular_less_than(index, tail));
2435  bool forceFreeLastBlock = index != tail; // If we enter the loop, then the last (tail) block will not be freed
2436  while (index != tail) {
2437  if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
2438  if (block != nullptr) {
2439  // Free the old block
2440  this->parent->add_block_to_free_list(block);
2441  }
2442 
2443  block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
2444  }
2445 
2446  ((*block)[index])->~T();
2447  ++index;
2448  }
2449  // Even if the queue is empty, there's still one block that's not on the free list
2450  // (unless the head index reached the end of it, in which case the tail will be poised
2451  // to create a new block).
2452  if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
2453  this->parent->add_block_to_free_list(this->tailBlock);
2454  }
2455 
2456  // Destroy block index
2457  auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
2458  if (localBlockIndex != nullptr) {
2459  for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
2460  localBlockIndex->index[i]->~BlockIndexEntry();
2461  }
2462  do {
2463  auto prev = localBlockIndex->prev;
2464  localBlockIndex->~BlockIndexHeader();
2465  (Traits::free)(localBlockIndex);
2466  localBlockIndex = prev;
2467  } while (localBlockIndex != nullptr);
2468  }
2469  }
2470 
2471  template<AllocationMode allocMode, typename U>
2472  inline bool enqueue(U&& element)
2473  {
2474  index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
2475  index_t newTailIndex = 1 + currentTailIndex;
2476  if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
2477  // We reached the end of a block, start a new one
2478  auto head = this->headIndex.load(std::memory_order_relaxed);
2479  assert(!details::circular_less_than<index_t>(currentTailIndex, head));
2480  if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
2481  return false;
2482  }
2483 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
2484  debug::DebugLock lock(mutex);
2485 #endif
2486  // Find out where we'll be inserting this block in the block index
2487  BlockIndexEntry* idxEntry;
2488  if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
2489  return false;
2490  }
2491 
2492  // Get ahold of a new block
2493  auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
2494  if (newBlock == nullptr) {
2496  idxEntry->value.store(nullptr, std::memory_order_relaxed);
2497  return false;
2498  }
2499 #ifdef MCDBGQ_TRACKMEM
2500  newBlock->owner = this;
2501 #endif
2502  newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
2503 
2504  MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
2505  // May throw, try to insert now before we publish the fact that we have this new block
2506  MOODYCAMEL_TRY {
2507  new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
2508  }
2509  MOODYCAMEL_CATCH (...) {
2511  idxEntry->value.store(nullptr, std::memory_order_relaxed);
2512  this->parent->add_block_to_free_list(newBlock);
2514  }
2515  }
2516 
2517  // Insert the new block into the index
2518  idxEntry->value.store(newBlock, std::memory_order_relaxed);
2519 
2520  this->tailBlock = newBlock;
2521 
2522  MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
2523  this->tailIndex.store(newTailIndex, std::memory_order_release);
2524  return true;
2525  }
2526  }
2527 
2528  // Enqueue
2529  new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
2530 
2531  this->tailIndex.store(newTailIndex, std::memory_order_release);
2532  return true;
2533  }
2534 
2535  template<typename U>
2536  bool dequeue(U& element)
2537  {
2538  // See ExplicitProducer::dequeue for rationale and explanation
2539  index_t tail = this->tailIndex.load(std::memory_order_relaxed);
2540  index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
2541  if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
2542  std::atomic_thread_fence(std::memory_order_acquire);
2543 
2544  index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
2545  tail = this->tailIndex.load(std::memory_order_acquire);
2546  if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
2547  index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
2548 
2549  // Determine which block the element is in
2550  auto entry = get_block_index_entry_for_index(index);
2551 
2552  // Dequeue
2553  auto block = entry->value.load(std::memory_order_relaxed);
2554  auto& el = *((*block)[index]);
2555 
2556  if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
2557 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
2558  // Note: Acquiring the mutex with every dequeue instead of only when a block
2559  // is released is very sub-optimal, but it is, after all, purely debug code.
2560  debug::DebugLock lock(producer->mutex);
2561 #endif
2562  struct Guard {
2563  Block* block;
2564  index_t index;
2565  BlockIndexEntry* entry;
2567 
2568  ~Guard()
2569  {
2570  (*block)[index]->~T();
2571  if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
2572  entry->value.store(nullptr, std::memory_order_relaxed);
2574  }
2575  }
2576  } guard = { block, index, entry, this->parent };
2577 
2578  element = std::move(el); // NOLINT
2579  }
2580  else {
2581  element = std::move(el); // NOLINT
2582  el.~T(); // NOLINT
2583 
2584  if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
2585  {
2586 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
2587  debug::DebugLock lock(mutex);
2588 #endif
2589  // Add the block back into the global free pool (and remove from block index)
2590  entry->value.store(nullptr, std::memory_order_relaxed);
2591  }
2592  this->parent->add_block_to_free_list(block); // releases the above store
2593  }
2594  }
2595 
2596  return true;
2597  }
2598  else {
2599  this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
2600  }
2601  }
2602 
2603  return false;
2604  }
2605 
2606 #ifdef _MSC_VER
2607 #pragma warning(push)
2608 #pragma warning(disable: 4706) // assignment within conditional expression
2609 #endif
2610  template<AllocationMode allocMode, typename It>
2611  bool enqueue_bulk(It itemFirst, size_t count)
2612  {
2613  // First, we need to make sure we have enough room to enqueue all of the elements;
2614  // this means pre-allocating blocks and putting them in the block index (but only if
2615  // all the allocations succeeded).
2616 
2617  // Note that the tailBlock we start off with may not be owned by us any more;
2618  // this happens if it was filled up exactly to the top (setting tailIndex to
2619  // the first index of the next block which is not yet allocated), then dequeued
2620  // completely (putting it on the free list) before we enqueue again.
2621 
2622  index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
2623  auto startBlock = this->tailBlock;
2624  Block* firstAllocatedBlock = nullptr;
2625  auto endBlock = this->tailBlock;
2626 
2627  // Figure out how many blocks we'll need to allocate, and do so
2628  size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
2629  index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
2630  if (blockBaseDiff > 0) {
2631 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
2632  debug::DebugLock lock(mutex);
2633 #endif
2634  do {
2635  blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
2636  currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
2637 
2638  // Find out where we'll be inserting this block in the block index
2639  BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell
2640  Block* newBlock;
2641  bool indexInserted = false;
2642  auto head = this->headIndex.load(std::memory_order_relaxed);
2643  assert(!details::circular_less_than<index_t>(currentTailIndex, head));
2644  bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
2645 
2646  if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
2647  // Index allocation or block allocation failed; revert any other allocations
2648  // and index insertions done so far for this operation
2649  if (indexInserted) {
2651  idxEntry->value.store(nullptr, std::memory_order_relaxed);
2652  }
2653  currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
2654  for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
2655  currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
2656  idxEntry = get_block_index_entry_for_index(currentTailIndex);
2657  idxEntry->value.store(nullptr, std::memory_order_relaxed);
2659  }
2660  this->parent->add_blocks_to_free_list(firstAllocatedBlock);
2661  this->tailBlock = startBlock;
2662 
2663  return false;
2664  }
2665 
2666 #ifdef MCDBGQ_TRACKMEM
2667  newBlock->owner = this;
2668 #endif
2669  newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
2670  newBlock->next = nullptr;
2671 
2672  // Insert the new block into the index
2673  idxEntry->value.store(newBlock, std::memory_order_relaxed);
2674 
2675  // Store the chain of blocks so that we can undo if later allocations fail,
2676  // and so that we can find the blocks when we do the actual enqueueing
2677  if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
2678  assert(this->tailBlock != nullptr);
2679  this->tailBlock->next = newBlock;
2680  }
2681  this->tailBlock = newBlock;
2682  endBlock = newBlock;
2683  firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
2684  } while (blockBaseDiff > 0);
2685  }
2686 
2687  // Enqueue, one block at a time
2688  index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
2689  currentTailIndex = startTailIndex;
2690  this->tailBlock = startBlock;
2691  assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
2692  if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
2693  this->tailBlock = firstAllocatedBlock;
2694  }
2695  while (true) {
2696  index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
2697  if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
2698  stopIndex = newTailIndex;
2699  }
2700  MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
2701  while (currentTailIndex != stopIndex) {
2702  new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
2703  }
2704  }
2705  else {
2706  MOODYCAMEL_TRY {
2707  while (currentTailIndex != stopIndex) {
2708  new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
2709  ++currentTailIndex;
2710  ++itemFirst;
2711  }
2712  }
2713  MOODYCAMEL_CATCH (...) {
2714  auto constructedStopIndex = currentTailIndex;
2715  auto lastBlockEnqueued = this->tailBlock;
2716 
2718  auto block = startBlock;
2719  if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
2720  block = firstAllocatedBlock;
2721  }
2722  currentTailIndex = startTailIndex;
2723  while (true) {
2724  stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
2725  if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
2726  stopIndex = constructedStopIndex;
2727  }
2728  while (currentTailIndex != stopIndex) {
2729  (*block)[currentTailIndex++]->~T();
2730  }
2731  if (block == lastBlockEnqueued) {
2732  break;
2733  }
2734  block = block->next;
2735  }
2736  }
2737 
2738  currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
2739  for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
2740  currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
2741  auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
2742  idxEntry->value.store(nullptr, std::memory_order_relaxed);
2744  }
2745  this->parent->add_blocks_to_free_list(firstAllocatedBlock);
2746  this->tailBlock = startBlock;
2748  }
2749  }
2750 
2751  if (this->tailBlock == endBlock) {
2752  assert(currentTailIndex == newTailIndex);
2753  break;
2754  }
2755  this->tailBlock = this->tailBlock->next;
2756  }
2757  this->tailIndex.store(newTailIndex, std::memory_order_release);
2758  return true;
2759  }
2760 #ifdef _MSC_VER
2761 #pragma warning(pop)
2762 #endif
2763 
2764  template<typename It>
2765  size_t dequeue_bulk(It& itemFirst, size_t max)
2766  {
2767  auto tail = this->tailIndex.load(std::memory_order_relaxed);
2768  auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
2769  auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
2770  if (details::circular_less_than<size_t>(0, desiredCount)) {
2771  desiredCount = desiredCount < max ? desiredCount : max;
2772  std::atomic_thread_fence(std::memory_order_acquire);
2773 
2774  auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
2775 
2776  tail = this->tailIndex.load(std::memory_order_acquire);
2777  auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
2778  if (details::circular_less_than<size_t>(0, actualCount)) {
2779  actualCount = desiredCount < actualCount ? desiredCount : actualCount;
2780  if (actualCount < desiredCount) {
2781  this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
2782  }
2783 
2784  // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
2785  // will never exceed tail.
2786  auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
2787 
2788  // Iterate the blocks and dequeue
2789  auto index = firstIndex;
2790  BlockIndexHeader* localBlockIndex;
2791  auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
2792  do {
2793  auto blockStartIndex = index;
2794  index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
2795  endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
2796 
2797  auto entry = localBlockIndex->index[indexIndex];
2798  auto block = entry->value.load(std::memory_order_relaxed);
2799  if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
2800  while (index != endIndex) {
2801  auto& el = *((*block)[index]);
2802  *itemFirst++ = std::move(el);
2803  el.~T();
2804  ++index;
2805  }
2806  }
2807  else {
2808  MOODYCAMEL_TRY {
2809  while (index != endIndex) {
2810  auto& el = *((*block)[index]);
2811  *itemFirst = std::move(el);
2812  ++itemFirst;
2813  el.~T();
2814  ++index;
2815  }
2816  }
2817  MOODYCAMEL_CATCH (...) {
2818  do {
2819  entry = localBlockIndex->index[indexIndex];
2820  block = entry->value.load(std::memory_order_relaxed);
2821  while (index != endIndex) {
2822  (*block)[index++]->~T();
2823  }
2824 
2825  if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
2826 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
2827  debug::DebugLock lock(mutex);
2828 #endif
2829  entry->value.store(nullptr, std::memory_order_relaxed);
2830  this->parent->add_block_to_free_list(block);
2831  }
2832  indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
2833 
2834  blockStartIndex = index;
2835  endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
2836  endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
2837  } while (index != firstIndex + actualCount);
2838 
2840  }
2841  }
2842  if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
2843  {
2844 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
2845  debug::DebugLock lock(mutex);
2846 #endif
2847  // Note that the set_many_empty above did a release, meaning that anybody who acquires the block
2848  // we're about to free can use it safely since our writes (and reads!) will have happened-before then.
2849  entry->value.store(nullptr, std::memory_order_relaxed);
2850  }
2851  this->parent->add_block_to_free_list(block); // releases the above store
2852  }
2853  indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
2854  } while (index != firstIndex + actualCount);
2855 
2856  return actualCount;
2857  }
2858  else {
2859  this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
2860  }
2861  }
2862 
2863  return 0;
2864  }
2865 
2866  private:
2867  // The block size must be > 1, so any number with the low bit set is an invalid block base index
2868  static const index_t INVALID_BLOCK_BASE = 1;
2869 
2871  {
2872  std::atomic<index_t> key;
2873  std::atomic<Block*> value;
2874  };
2875 
2877  {
2878  size_t capacity;
2879  std::atomic<size_t> tail;
2883  };
2884 
2885  template<AllocationMode allocMode>
2886  inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
2887  {
2888  auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); // We're the only writer thread, relaxed is OK
2889  if (localBlockIndex == nullptr) {
2890  return false; // this can happen if new_block_index failed in the constructor
2891  }
2892  size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
2893  idxEntry = localBlockIndex->index[newTail];
2894  if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
2895  idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
2896 
2897  idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
2898  localBlockIndex->tail.store(newTail, std::memory_order_release);
2899  return true;
2900  }
2901 
2902  // No room in the old block index, try to allocate another one!
2903  MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
2904  return false;
2905  }
2906  else if (!new_block_index()) {
2907  return false;
2908  }
2909  localBlockIndex = blockIndex.load(std::memory_order_relaxed);
2910  newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
2911  idxEntry = localBlockIndex->index[newTail];
2912  assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
2913  idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
2914  localBlockIndex->tail.store(newTail, std::memory_order_release);
2915  return true;
2916  }
2917 
2919  {
2920  auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
2921  localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
2922  }
2923 
2925  {
2926  BlockIndexHeader* localBlockIndex;
2927  auto idx = get_block_index_index_for_index(index, localBlockIndex);
2928  return localBlockIndex->index[idx];
2929  }
2930 
2931  inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
2932  {
2933 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
2934  debug::DebugLock lock(mutex);
2935 #endif
2936  index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
2937  localBlockIndex = blockIndex.load(std::memory_order_acquire);
2938  auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
2939  auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
2940  assert(tailBase != INVALID_BLOCK_BASE);
2941  // Note: Must use division instead of shift because the index may wrap around, causing a negative
2942  // offset, whose negativity we want to preserve
2943  auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE);
2944  size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
2945  assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
2946  return idx;
2947  }
2948 
2950  {
2951  auto prev = blockIndex.load(std::memory_order_relaxed);
2952  size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
2953  auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
2954  auto raw = static_cast<char*>((Traits::malloc)(
2955  sizeof(BlockIndexHeader) +
2956  std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
2958  if (raw == nullptr) {
2959  return false;
2960  }
2961 
2962  auto header = new (raw) BlockIndexHeader;
2963  auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
2964  auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
2965  if (prev != nullptr) {
2966  auto prevTail = prev->tail.load(std::memory_order_relaxed);
2967  auto prevPos = prevTail;
2968  size_t i = 0;
2969  do {
2970  prevPos = (prevPos + 1) & (prev->capacity - 1);
2971  index[i++] = prev->index[prevPos];
2972  } while (prevPos != prevTail);
2973  assert(i == prevCapacity);
2974  }
2975  for (size_t i = 0; i != entryCount; ++i) {
2976  new (entries + i) BlockIndexEntry;
2977  entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
2978  index[prevCapacity + i] = entries + i;
2979  }
2980  header->prev = prev;
2981  header->entries = entries;
2982  header->index = index;
2983  header->capacity = nextBlockIndexCapacity;
2984  header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
2985 
2986  blockIndex.store(header, std::memory_order_release);
2987 
2988  nextBlockIndexCapacity <<= 1;
2989 
2990  return true;
2991  }
2992 
2993  private:
2995  std::atomic<BlockIndexHeader*> blockIndex;
2996 
2997 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
2998  public:
2999  details::ThreadExitListener threadExitListener;
3000  private:
3001 #endif
3002 
3003 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
3004  public:
3005  ImplicitProducer* nextImplicitProducer;
3006  private:
3007 #endif
3008 
3009 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
3010  mutable debug::DebugMutex mutex;
3011 #endif
3012 #ifdef MCDBGQ_TRACKMEM
3013  friend struct MemStats;
3014 #endif
3015  };
3016 
3017 
3019  // Block pool manipulation
3021 
3022  void populate_initial_block_list(size_t blockCount)
3023  {
3024  initialBlockPoolSize = blockCount;
3025  if (initialBlockPoolSize == 0) {
3026  initialBlockPool = nullptr;
3027  return;
3028  }
3029 
3030  initialBlockPool = create_array<Block>(blockCount);
3031  if (initialBlockPool == nullptr) {
3033  }
3034  for (size_t i = 0; i < initialBlockPoolSize; ++i) {
3036  }
3037  }
3038 
3040  {
3041  if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
3042  return nullptr;
3043  }
3044 
3045  auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
3046 
3047  return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
3048  }
3049 
3050  inline void add_block_to_free_list(Block* block)
3051  {
3052 #ifdef MCDBGQ_TRACKMEM
3053  block->owner = nullptr;
3054 #endif
3055  freeList.add(block);
3056  }
3057 
3058  inline void add_blocks_to_free_list(Block* block)
3059  {
3060  while (block != nullptr) {
3061  auto next = block->next;
3062  add_block_to_free_list(block);
3063  block = next;
3064  }
3065  }
3066 
3068  {
3069  return freeList.try_get();
3070  }
3071 
3072  // Gets a free block from one of the memory pools, or allocates a new one (if applicable)
3073  template<AllocationMode canAlloc>
3075  {
3076  auto block = try_get_block_from_initial_pool();
3077  if (block != nullptr) {
3078  return block;
3079  }
3080 
3081  block = try_get_block_from_free_list();
3082  if (block != nullptr) {
3083  return block;
3084  }
3085 
3086  MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
3087  return create<Block>();
3088  }
3089  else {
3090  return nullptr;
3091  }
3092  }
3093 
3094 
3095 #ifdef MCDBGQ_TRACKMEM
3096  public:
3097  struct MemStats {
3098  size_t allocatedBlocks;
3099  size_t usedBlocks;
3100  size_t freeBlocks;
3101  size_t ownedBlocksExplicit;
3102  size_t ownedBlocksImplicit;
3103  size_t implicitProducers;
3104  size_t explicitProducers;
3105  size_t elementsEnqueued;
3106  size_t blockClassBytes;
3107  size_t queueClassBytes;
3108  size_t implicitBlockIndexBytes;
3109  size_t explicitBlockIndexBytes;
3110 
3111  friend class ConcurrentQueue;
3112 
3113  private:
3114  static MemStats getFor(ConcurrentQueue* q)
3115  {
3116  MemStats stats = { 0 };
3117 
3118  stats.elementsEnqueued = q->size_approx();
3119 
3120  auto block = q->freeList.head_unsafe();
3121  while (block != nullptr) {
3122  ++stats.allocatedBlocks;
3123  ++stats.freeBlocks;
3124  block = block->freeListNext.load(std::memory_order_relaxed);
3125  }
3126 
3127  for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
3128  bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
3129  stats.implicitProducers += implicit ? 1 : 0;
3130  stats.explicitProducers += implicit ? 0 : 1;
3131 
3132  if (implicit) {
3133  auto prod = static_cast<ImplicitProducer*>(ptr);
3134  stats.queueClassBytes += sizeof(ImplicitProducer);
3135  auto head = prod->headIndex.load(std::memory_order_relaxed);
3136  auto tail = prod->tailIndex.load(std::memory_order_relaxed);
3137  auto hash = prod->blockIndex.load(std::memory_order_relaxed);
3138  if (hash != nullptr) {
3139  for (size_t i = 0; i != hash->capacity; ++i) {
3140  if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
3141  ++stats.allocatedBlocks;
3142  ++stats.ownedBlocksImplicit;
3143  }
3144  }
3145  stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
3146  for (; hash != nullptr; hash = hash->prev) {
3147  stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
3148  }
3149  }
3150  for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
3151  //auto block = prod->get_block_index_entry_for_index(head);
3152  ++stats.usedBlocks;
3153  }
3154  }
3155  else {
3156  auto prod = static_cast<ExplicitProducer*>(ptr);
3157  stats.queueClassBytes += sizeof(ExplicitProducer);
3158  auto tailBlock = prod->tailBlock;
3159  bool wasNonEmpty = false;
3160  if (tailBlock != nullptr) {
3161  auto block = tailBlock;
3162  do {
3163  ++stats.allocatedBlocks;
3164  if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
3165  ++stats.usedBlocks;
3166  wasNonEmpty = wasNonEmpty || block != tailBlock;
3167  }
3168  ++stats.ownedBlocksExplicit;
3169  block = block->next;
3170  } while (block != tailBlock);
3171  }
3172  auto index = prod->blockIndex.load(std::memory_order_relaxed);
3173  while (index != nullptr) {
3174  stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
3175  index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
3176  }
3177  }
3178  }
3179 
3180  auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
3181  stats.allocatedBlocks += freeOnInitialPool;
3182  stats.freeBlocks += freeOnInitialPool;
3183 
3184  stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
3185  stats.queueClassBytes += sizeof(ConcurrentQueue);
3186 
3187  return stats;
3188  }
3189  };
3190 
3191  // For debugging only. Not thread-safe.
3192  MemStats getMemStats()
3193  {
3194  return MemStats::getFor(this);
3195  }
3196  private:
3197  friend struct MemStats;
3198 #endif
3199 
3200 
3202  // Producer list manipulation
3204 
3205  ProducerBase* recycle_or_create_producer(bool isExplicit)
3206  {
3207  bool recycled;
3208  return recycle_or_create_producer(isExplicit, recycled);
3209  }
3210 
3211  ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled)
3212  {
3213 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
3214  debug::DebugLock lock(implicitProdMutex);
3215 #endif
3216  // Try to re-use one first
3217  for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
3218  if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {
3219  bool expected = true;
3220  if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {
3221  // We caught one! It's been marked as activated, the caller can have it
3222  recycled = true;
3223  return ptr;
3224  }
3225  }
3226  }
3227 
3228  recycled = false;
3229  return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
3230  }
3231 
3232  ProducerBase* add_producer(ProducerBase* producer)
3233  {
3234  // Handle failed memory allocation
3235  if (producer == nullptr) {
3236  return nullptr;
3237  }
3238 
3239  producerCount.fetch_add(1, std::memory_order_relaxed);
3240 
3241  // Add it to the lock-free list
3242  auto prevTail = producerListTail.load(std::memory_order_relaxed);
3243  do {
3244  producer->next = prevTail;
3245  } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
3246 
3247 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
3248  if (producer->isExplicit) {
3249  auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
3250  do {
3251  static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
3252  } while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
3253  }
3254  else {
3255  auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
3256  do {
3257  static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
3258  } while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
3259  }
3260 #endif
3261 
3262  return producer;
3263  }
3264 
3266  {
3267  // After another instance is moved-into/swapped-with this one, all the
3268  // producers we stole still think their parents are the other queue.
3269  // So fix them up!
3270  for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
3271  ptr->parent = this;
3272  }
3273  }
3274 
3275 
3277  // Implicit producer hash
3279 
3281  {
3282  std::atomic<details::thread_id_t> key;
3283  ImplicitProducer* value; // No need for atomicity since it's only read by the thread that sets it in the first place
3284 
3285  ImplicitProducerKVP() : value(nullptr) { }
3286 
3288  {
3289  key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
3290  value = other.value;
3291  }
3292 
3294  {
3295  swap(other);
3296  return *this;
3297  }
3298 
3300  {
3301  if (this != &other) {
3302  details::swap_relaxed(key, other.key);
3303  std::swap(value, other.value);
3304  }
3305  }
3306  };
3307 
3308  template<typename XT, typename XTraits>
3310 
3312  {
3313  size_t capacity;
3316  };
3317 
3319  {
3321  return;
3322  }
3323  else {
3324  implicitProducerHashCount.store(0, std::memory_order_relaxed);
3328  for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
3329  initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
3330  }
3331  hash->prev = nullptr;
3332  implicitProducerHash.store(hash, std::memory_order_relaxed);
3333  }
3334  }
3335 
3337  {
3339  return;
3340  }
3341  else {
3342  // Swap (assumes our implicit producer hash is initialized)
3346 
3348 
3350  if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
3351  implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
3352  }
3353  else {
3354  ImplicitProducerHash* hash;
3355  for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
3356  continue;
3357  }
3359  }
3360  if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
3361  other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
3362  }
3363  else {
3364  ImplicitProducerHash* hash;
3365  for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
3366  continue;
3367  }
3368  hash->prev = &other.initialImplicitProducerHash;
3369  }
3370  }
3371  }
3372 
3373  // Only fails (returns nullptr) if memory allocation fails
3375  {
3376  // Note that since the data is essentially thread-local (key is thread ID),
3377  // there's a reduced need for fences (memory ordering is already consistent
3378  // for any individual thread), except for the current table itself.
3379 
3380  // Start by looking for the thread ID in the current and all previous hash tables.
3381  // If it's not found, it must not be in there yet, since this same thread would
3382  // have added it previously to one of the tables that we traversed.
3383 
3384  // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
3385 
3386 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
3387  debug::DebugLock lock(implicitProdMutex);
3388 #endif
3389 
3390  auto id = details::thread_id();
3391  auto hashedId = details::hash_thread_id(id);
3392 
3393  auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
3394  assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null)
3395  for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
3396  // Look for the id in this hash
3397  auto index = hashedId;
3398  while (true) { // Not an infinite loop because at least one slot is free in the hash table
3399  index &= hash->capacity - 1;
3400 
3401  auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
3402  if (probedKey == id) {
3403  // Found it! If we had to search several hashes deep, though, we should lazily add it
3404  // to the current main hash table to avoid the extended search next time.
3405  // Note there's guaranteed to be room in the current hash table since every subsequent
3406  // table implicitly reserves space for all previous tables (there's only one
3407  // implicitProducerHashCount).
3408  auto value = hash->entries[index].value;
3409  if (hash != mainHash) {
3410  index = hashedId;
3411  while (true) {
3412  index &= mainHash->capacity - 1;
3413  probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
3414  auto empty = details::invalid_thread_id;
3415 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
3416  auto reusable = details::invalid_thread_id2;
3417  if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) ||
3418  (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) {
3419 #else
3420  if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) {
3421 #endif
3422  mainHash->entries[index].value = value;
3423  break;
3424  }
3425  ++index;
3426  }
3427  }
3428 
3429  return value;
3430  }
3431  if (probedKey == details::invalid_thread_id) {
3432  break; // Not in this hash table
3433  }
3434  ++index;
3435  }
3436  }
3437 
3438  // Insert!
3439  auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
3440  while (true) {
3441  // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
3442  if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
3443  // We've acquired the resize lock, try to allocate a bigger hash table.
3444  // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
3445  // we reload implicitProducerHash it must be the most recent version (it only gets changed within this
3446  // locked block).
3447  mainHash = implicitProducerHash.load(std::memory_order_acquire);
3448  if (newCount >= (mainHash->capacity >> 1)) {
3449  auto newCapacity = mainHash->capacity << 1;
3450  while (newCount >= (newCapacity >> 1)) {
3451  newCapacity <<= 1;
3452  }
3453  auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
3454  if (raw == nullptr) {
3455  // Allocation failed
3456  implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
3457  implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
3458  return nullptr;
3459  }
3460 
3461  auto newHash = new (raw) ImplicitProducerHash;
3462  newHash->capacity = static_cast<size_t>(newCapacity);
3463  newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
3464  for (size_t i = 0; i != newCapacity; ++i) {
3465  new (newHash->entries + i) ImplicitProducerKVP;
3466  newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
3467  }
3468  newHash->prev = mainHash;
3469  implicitProducerHash.store(newHash, std::memory_order_release);
3470  implicitProducerHashResizeInProgress.clear(std::memory_order_release);
3471  mainHash = newHash;
3472  }
3473  else {
3474  implicitProducerHashResizeInProgress.clear(std::memory_order_release);
3475  }
3476  }
3477 
3478  // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
3479  // to finish being allocated by another thread (and if we just finished allocating above, the condition will
3480  // always be true)
3481  if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
3482  bool recycled;
3483  auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false, recycled));
3484  if (producer == nullptr) {
3485  implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
3486  return nullptr;
3487  }
3488  if (recycled) {
3489  implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
3490  }
3491 
3492 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
3493  producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
3494  producer->threadExitListener.userData = producer;
3495  details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
3496 #endif
3497 
3498  auto index = hashedId;
3499  while (true) {
3500  index &= mainHash->capacity - 1;
3501  auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
3502 
3503  auto empty = details::invalid_thread_id;
3504 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
3505  auto reusable = details::invalid_thread_id2;
3506  if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) ||
3507  (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) {
3508 #else
3509  if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) {
3510 #endif
3511  mainHash->entries[index].value = producer;
3512  break;
3513  }
3514  ++index;
3515  }
3516  return producer;
3517  }
3518 
3519  // Hmm, the old hash is quite full and somebody else is busy allocating a new one.
3520  // We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
3521  // we try to allocate ourselves).
3522  mainHash = implicitProducerHash.load(std::memory_order_acquire);
3523  }
3524  }
3525 
3526 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
3527  void implicit_producer_thread_exited(ImplicitProducer* producer)
3528  {
3529  // Remove from thread exit listeners
3530  details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);
3531 
3532  // Remove from hash
3533 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
3534  debug::DebugLock lock(implicitProdMutex);
3535 #endif
3536  auto hash = implicitProducerHash.load(std::memory_order_acquire);
3537  assert(hash != nullptr); // The thread exit listener is only registered if we were added to a hash in the first place
3538  auto id = details::thread_id();
3539  auto hashedId = details::hash_thread_id(id);
3540  details::thread_id_t probedKey;
3541 
3542  // We need to traverse all the hashes just in case other threads aren't on the current one yet and are
3543  // trying to add an entry thinking there's a free slot (because they reused a producer)
3544  for (; hash != nullptr; hash = hash->prev) {
3545  auto index = hashedId;
3546  do {
3547  index &= hash->capacity - 1;
3548  probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
3549  if (probedKey == id) {
3550  hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release);
3551  break;
3552  }
3553  ++index;
3554  } while (probedKey != details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
3555  }
3556 
3557  // Mark the queue as being recyclable
3558  producer->inactive.store(true, std::memory_order_release);
3559  }
3560 
3561  static void implicit_producer_thread_exited_callback(void* userData)
3562  {
3563  auto producer = static_cast<ImplicitProducer*>(userData);
3564  auto queue = producer->parent;
3565  queue->implicit_producer_thread_exited(producer);
3566  }
3567 #endif
3568 
3570  // Utility functions
3572 
3573  template<typename TAlign>
3574  static inline void* aligned_malloc(size_t size)
3575  {
3577  return (Traits::malloc)(size);
3578  else {
3579  size_t alignment = std::alignment_of<TAlign>::value;
3580  void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));
3581  if (!raw)
3582  return nullptr;
3583  char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
3584  *(reinterpret_cast<void**>(ptr) - 1) = raw;
3585  return ptr;
3586  }
3587  }
3588 
3589  template<typename TAlign>
3590  static inline void aligned_free(void* ptr)
3591  {
3593  return (Traits::free)(ptr);
3594  else
3595  (Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);
3596  }
3597 
3598  template<typename U>
3599  static inline U* create_array(size_t count)
3600  {
3601  assert(count > 0);
3602  U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
3603  if (p == nullptr)
3604  return nullptr;
3605 
3606  for (size_t i = 0; i != count; ++i)
3607  new (p + i) U();
3608  return p;
3609  }
3610 
3611  template<typename U>
3612  static inline void destroy_array(U* p, size_t count)
3613  {
3614  if (p != nullptr) {
3615  assert(count > 0);
3616  for (size_t i = count; i != 0; )
3617  (p + --i)->~U();
3618  }
3619  aligned_free<U>(p);
3620  }
3621 
3622  template<typename U>
3623  static inline U* create()
3624  {
3625  void* p = aligned_malloc<U>(sizeof(U));
3626  return p != nullptr ? new (p) U : nullptr;
3627  }
3628 
3629  template<typename U, typename A1>
3630  static inline U* create(A1&& a1)
3631  {
3632  void* p = aligned_malloc<U>(sizeof(U));
3633  return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
3634  }
3635 
3636  template<typename U>
3637  static inline void destroy(U* p)
3638  {
3639  if (p != nullptr)
3640  p->~U();
3641  aligned_free<U>(p);
3642  }
3643 
3644 private:
3645  std::atomic<ProducerBase*> producerListTail;
3646  std::atomic<std::uint32_t> producerCount;
3647 
3648  std::atomic<size_t> initialBlockPoolIndex;
3651 
3652 #ifndef MCDBGQ_USEDEBUGFREELIST
3653  FreeList<Block> freeList;
3654 #else
3655  debug::DebugFreeList<Block> freeList;
3656 #endif
3657 
3658  std::atomic<ImplicitProducerHash*> implicitProducerHash;
3659  std::atomic<size_t> implicitProducerHashCount; // Number of slots logically used
3660  ImplicitProducerHash initialImplicitProducerHash;
3661  std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
3663 
3664  std::atomic<std::uint32_t> nextExplicitConsumerId;
3665  std::atomic<std::uint32_t> globalExplicitConsumerOffset;
3666 
3667 #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
3668  debug::DebugMutex implicitProdMutex;
3669 #endif
3670 
3671 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
3672  std::atomic<ExplicitProducer*> explicitProducers;
3673  std::atomic<ImplicitProducer*> implicitProducers;
3674 #endif
3675 };
3676 
3677 
3678 template<typename T, typename Traits>
3680  : producer(queue.recycle_or_create_producer(true))
3681 {
3682  if (producer != nullptr) {
3683  producer->token = this;
3684  }
3685 }
3686 
3687 template<typename T, typename Traits>
3689  : producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))
3690 {
3691  if (producer != nullptr) {
3692  producer->token = this;
3693  }
3694 }
3695 
3696 template<typename T, typename Traits>
3698  : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
3699 {
3700  initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
3701  lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
3702 }
3703 
3704 template<typename T, typename Traits>
3706  : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
3707 {
3708  initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
3709  lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
3710 }
3711 
3712 template<typename T, typename Traits>
3714 {
3715  a.swap(b);
3716 }
3717 
3719 {
3720  a.swap(b);
3721 }
3722 
3724 {
3725  a.swap(b);
3726 }
3727 
3728 template<typename T, typename Traits>
3730 {
3731  a.swap(b);
3732 }
3733 
3734 }
3735 
3736 #if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
3737 #pragma warning(pop)
3738 #endif
3739 
3740 #if defined(__GNUC__)
3741 #pragma GCC diagnostic pop
3742 #endif
get_time
static double get_time()
Definition: test_modulation.cc:24
COMPLEX
arma::cx_float COMPLEX
Definition: test_transpose.cc:27
moodycamel::ConcurrentQueue::ImplicitProducer::insert_block_index_entry
bool insert_block_index_entry(BlockIndexEntry *&idxEntry, index_t blockStartIndex)
Definition: concurrentqueue.h:2886
DEFINE_string
DEFINE_string(profile, "random", "The profile of the input user bytes (e.g., 'random', '123')")
stick_this_thread_to_core
int stick_this_thread_to_core(int core_id)
Definition: cpu_attach.cc:10
moodycamel::ConcurrentQueue::ExplicitProducer::enqueue
bool enqueue(U &&element)
Definition: concurrentqueue.h:1834
bench_multiply_dim2
static double bench_multiply_dim2(unsigned Nx, unsigned Ny, unsigned iterations)
Definition: test_matrix.cc:89
PhyStats
Definition: phy_stats.h:15
sqrt
2 sqrt()
Demod64qamHardSse
void Demod64qamHardSse(float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:732
encoder.h
Definitions for Agora's AVX2-based LDPC encoder.
moodycamel::ConcurrentQueue::is_lock_free
static bool is_lock_free()
Definition: concurrentqueue.h:1317
LdpcEncodingInputBufSize
static size_t LdpcEncodingInputBufSize(size_t base_graph, size_t zc)
Definition: utils_ldpc.h:167
moodycamel::ConcurrentQueue::try_enqueue_bulk
bool try_enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count)
Definition: concurrentqueue.h:1092
moodycamel::ConcurrentQueue::aligned_free
static void aligned_free(void *ptr)
Definition: concurrentqueue.h:3590
moodycamel::ConcurrentQueue::ImplicitProducer::dequeue
bool dequeue(U &element)
Definition: concurrentqueue.h:2536
Demod256qamHardLoop
void Demod256qamHardLoop(const float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:1269
run_benchmark_ZF
static void run_benchmark_ZF(unsigned Nx, unsigned Ny, unsigned iterations)
Definition: test_matrix.cc:131
udp_comm.h
Declaration file for the UDPComm class. This class is used to send messages and receive messages from...
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexHeader::capacity
size_t capacity
Definition: concurrentqueue.h:2878
freq_ghz
double freq_ghz
Definition: bench.cc:10
run_benchmark_precode
static void run_benchmark_precode(unsigned Nx, unsigned Ny, unsigned iterations)
Definition: test_matrix.cc:189
kNumCodeBlocks
static constexpr size_t kNumCodeBlocks
Definition: test_ldpc.cc:22
bench_ZF_warmup
static double bench_ZF_warmup(unsigned Nx, unsigned Ny, unsigned iterations)
Definition: test_matrix.cc:29
moodycamel::ConcurrentQueue::ImplicitProducer
Definition: concurrentqueue.h:2406
moodycamel::ConcurrentQueueDefaultTraits::IMPLICIT_INITIAL_INDEX_SIZE
static const size_t IMPLICIT_INITIAL_INDEX_SIZE
Definition: concurrentqueue.h:357
moodycamel::ProducerToken
Definition: concurrentqueue.h:630
moodycamel::details::_hash_32_or_64
Definition: concurrentqueue.h:435
moodycamel::ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
Definition: concurrentqueue.h:759
moodycamel::details::thread_id
thread_id_t thread_id()
Definition: concurrentqueue.h:157
flushCache
void flushCache()
Definition: test_matrix.cc:18
moodycamel::ConcurrentQueue::populate_initial_block_list
void populate_initial_block_list(size_t blockCount)
Definition: concurrentqueue.h:3022
run_benchmark_multiply
static void run_benchmark_multiply(int dim, unsigned Nx, unsigned Ny, unsigned iterations)
Definition: test_matrix.cc:160
moodycamel::ConcurrentQueue::ImplicitProducer::blockIndex
std::atomic< BlockIndexHeader * > blockIndex
Definition: concurrentqueue.h:2995
moodycamel::ConcurrentQueue::Block::freeListRefs
std::atomic< std::uint32_t > freeListRefs
Definition: concurrentqueue.h:1656
moodycamel::ConcurrentQueue::Block::reset_empty
void reset_empty()
Definition: concurrentqueue.h:1631
Table::Calloc
void Calloc(size_t dim1, size_t dim2, Agora_memory::Alignment_t alignment)
Definition: memory_manage.h:45
complex_float
Definition: test_transpose.cc:22
moodycamel::ProducerToken::operator=
ProducerToken & operator=(ProducerToken &&other) noexcept
Definition: concurrentqueue.h:647
moodycamel::ConcurrentQueue::operator=
ConcurrentQueue & operator=(ConcurrentQueue &&other) noexcept
Definition: concurrentqueue.h:924
PinToCoreWithOffset
void PinToCoreWithOffset(ThreadType thread_type, size_t core_offset, size_t thread_id, bool allow_reuse, bool verbose)
Definition: utils.cc:157
mm_gui.w
w
Definition: mm_gui.py:119
LDPCconfig::MaxDecoderIter
int16_t MaxDecoderIter() const
Definition: ldpc_config.h:48
run_benchmark_256qam
static void run_benchmark_256qam(unsigned iterations, unsigned mode)
Definition: test_modulation.cc:449
AdaptBitsFromMod
static void AdaptBitsFromMod(const uint8_t *vec_in, uint8_t *vec_out, int len, int mod_type)
Definition: utils_ldpc.h:86
moodycamel::ConcurrentQueue::ImplicitProducerKVP::key
std::atomic< details::thread_id_t > key
Definition: concurrentqueue.h:3282
moodycamel::ConcurrentQueue::ExplicitProducer
Definition: concurrentqueue.h:1749
values
Insert the data and pilot values
Definition: generate_data.m:83
moodycamel::ConcurrentQueue::swap
void swap(ConcurrentQueue &other) noexcept
Definition: concurrentqueue.h:934
size
end IFFT Reshape the symbol vector into two different spatial streams size
Definition: generate_data.m:73
fmt::v8::printf
auto printf(const S &fmt, const T &... args) -> int
Definition: printf.h:631
datatype_conversion.h
DataGenerator::GenRawData
void GenRawData(Direction dir, std::vector< int8_t > &information, size_t ue_id)
Generate one raw information bit sequence.
Definition: data_generator.h:65
moodycamel::ConcurrentQueue::ImplicitProducerHash
Definition: concurrentqueue.h:3311
UDPComm
Definition: udp_comm.h:18
moodycamel::ConcurrentQueue::destroy
static void destroy(U *p)
Definition: concurrentqueue.h:3637
LDPCconfig::EarlyTermination
bool EarlyTermination() const
Definition: ldpc_config.h:49
LdpcEncodeHelper
static void LdpcEncodeHelper(size_t base_graph, size_t zc, size_t nRows, int8_t *encoded_buffer, int8_t *parity_buffer, const int8_t *input_buffer)
Definition: utils_ldpc.h:195
ConvertShortToFloat
static void ConvertShortToFloat(const short *in_buf, float *out_buf, size_t n_elems)
Produces outputs -1->+0.999.
Definition: datatype_conversion.h:41
moodycamel::ConcurrentQueue::ExplicitProducer::BlockIndexHeader
Definition: concurrentqueue.h:2332
Config::CoreOffset
size_t CoreOffset() const
Definition: config.h:180
moodycamel::ConcurrentQueue::ExplicitProducer
friend struct ExplicitProducer
Definition: concurrentqueue.h:1332
moodycamel::details::static_is_lock_free_num::value
@ value
Definition: concurrentqueue.h:618
DataGenerator::Profile::kProfile123
@ kProfile123
run_benchmark_data_type
static void run_benchmark_data_type(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:401
src_port
uint16_t src_port
Definition: eth_common.h:60
moodycamel::details::thread_id_t
std::uintptr_t thread_id_t
Definition: concurrentqueue.h:154
moodycamel::ConcurrentQueue::MAX_SUBQUEUE_SIZE
static const size_t MAX_SUBQUEUE_SIZE
Definition: concurrentqueue.h:766
moodycamel::ConcurrentQueue::ExplicitProducer::dequeue_bulk
size_t dequeue_bulk(It &itemFirst, size_t max)
Definition: concurrentqueue.h:2233
bench_mod_256qam
static double bench_mod_256qam(unsigned iterations, unsigned mode)
Definition: test_modulation.cc:271
UDPServer::Connect
ssize_t Connect(const std::string &remote_address, const std::string &remote_port)
The remote_address | remote_port is the only address to which datagrams are received....
Definition: udp_server.h:39
MOODYCAMEL_THREADLOCAL
#define MOODYCAMEL_THREADLOCAL
Definition: concurrentqueue.h:151
moodycamel::ConcurrentQueue::try_dequeue_bulk_from_producer
size_t try_dequeue_bulk_from_producer(producer_token_t const &producer, It itemFirst, size_t max)
Definition: concurrentqueue.h:1292
moodycamel::ConcurrentQueue::initialImplicitProducerHashEntries
std::array< ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE > initialImplicitProducerHashEntries
Definition: concurrentqueue.h:3661
moodycamel::ConcurrentQueue::ImplicitProducerKVP::swap
void swap(ImplicitProducerKVP &other) noexcept
Definition: concurrentqueue.h:3299
GetTime::CyclesToNs
static double CyclesToNs(size_t cycles, double freq_ghz)
Definition: gettime.h:103
run_benchmark_64qam
static void run_benchmark_64qam(unsigned iterations, unsigned mode)
Definition: test_modulation.cc:444
moodycamel::ConcurrentQueue::try_dequeue_bulk
size_t try_dequeue_bulk(It itemFirst, size_t max)
Definition: concurrentqueue.h:1210
Direction
Direction
Definition: symbols.h:39
moodycamel::details::thread_id_converter
Definition: concurrentqueue.h:83
Demod64qamHardAvx2
void Demod64qamHardAvx2(float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:921
LOOP_NUM
#define LOOP_NUM
Definition: test_transpose.cc:20
moodycamel::ConcurrentQueue::size_approx
size_t size_approx() const
Definition: concurrentqueue.h:1304
moodycamel::ConsumerToken::initialOffset
std::uint32_t initialOffset
Definition: concurrentqueue.h:732
WorkerToMasterMaster
void WorkerToMasterMaster(moodycamel::ConcurrentQueue< ItemT > *queue)
Definition: test_concurrent_queue.cc:65
Stats
Definition: stats.h:63
moodycamel::details::identity
Definition: concurrentqueue.h:254
moodycamel::ConcurrentQueueDefaultTraits::BLOCK_SIZE
static const size_t BLOCK_SIZE
Definition: concurrentqueue.h:342
kSIMDTestNum
static constexpr size_t kSIMDTestNum
Definition: test_datatype_conversion.cc:13
Demod64qamSoftLoop
void Demod64qamSoftLoop(const float *vec_in, int8_t *llr, int num)
Definition: modulation_srslte.cc:117
SimdConvertFloat16ToFloat32
static void SimdConvertFloat16ToFloat32(float *out_buf, const float *in_buf, size_t n_elems)
Definition: datatype_conversion.h:547
fmt::v8::detail::digits::error
@ error
Definition: format-inl.h:643
main
int main(int argc, char *argv[])
Definition: test_modulation.cc:455
hammingdist
int hammingdist(uint8_t x, uint8_t y)
Definition: test_modulation.cc:249
moodycamel::ConcurrentQueue::FreeList::FreeList
FreeList()
Definition: concurrentqueue.h:1431
moodycamel::ConcurrentQueue< EventData >::InnerQueueContext
InnerQueueContext
Definition: concurrentqueue.h:1536
kCol2s
static constexpr size_t kCol2s
Definition: test_ptr_grid.cc:7
LdpcNumEncodedBits
static size_t LdpcNumEncodedBits(size_t base_graph, size_t zc, size_t nRows)
Definition: utils_ldpc.h:159
Catch::Generators::value
GeneratorWrapper< T > value(T &&value)
Definition: catch.hpp:3999
moodycamel::ConcurrentQueue::add_blocks_to_free_list
void add_blocks_to_free_list(Block *block)
Definition: concurrentqueue.h:3058
moodycamel::ConcurrentQueue::destroy_array
static void destroy_array(U *p, size_t count)
Definition: concurrentqueue.h:3612
fmt::v8::align::left
@ left
Definition: core.h:2021
moodycamel::ConcurrentQueue::ExplicitProducer::BlockIndexHeader::size
size_t size
Definition: concurrentqueue.h:2334
kFrameWnd
static constexpr size_t kFrameWnd
Definition: symbols.h:18
kCodeRate
static constexpr double kCodeRate[kModTestNum]
Definition: test_demul_threaded.cc:18
fmt::v8::detail::uintptr_t
fallback_uintptr uintptr_t
Definition: format.h:332
unused
#define unused(x)
Definition: utils.h:14
moodycamel::ConsumerToken::desiredProducer
details::ConcurrentQueueProducerTypelessBase * desiredProducer
Definition: concurrentqueue.h:736
gen_tag_t::FrmSymSc
static gen_tag_t FrmSymSc(size_t frame_id, size_t symbol_id, size_t sc_id)
Definition: message.h:95
kBaseGraph
static constexpr size_t kBaseGraph
Definition: test_ldpc.cc:23
modulation.h
moodycamel::ConsumerToken::swap
void swap(ConsumerToken &other) noexcept
Definition: concurrentqueue.h:714
TEST
TEST(TestConcurrentQueue, MasterToWorkerStatic)
Definition: test_concurrent_queue.cc:41
moodycamel::ConcurrentQueue::create_array
static U * create_array(size_t count)
Definition: concurrentqueue.h:3599
ItemT
Definition: test_concurrent_queue.cc:10
Agora_memory::PaddedAlignedAlloc
void * PaddedAlignedAlloc(Alignment_t alignment, size_t size)
Definition: memory_manage.cc:15
GetTime::GetTimeUs
static double GetTimeUs()
Definition: gettime.h:14
moodycamel::ConcurrentQueue::FreeList::REFS_MASK
static const std::uint32_t REFS_MASK
Definition: concurrentqueue.h:1523
moodycamel::ConcurrentQueue::add_producer
ProducerBase * add_producer(ProducerBase *producer)
Definition: concurrentqueue.h:3232
mm_gui.mode
string mode
Definition: mm_gui.py:105
moodycamel::ConcurrentQueue::ProducerBase::tailIndex
std::atomic< index_t > tailIndex
Definition: concurrentqueue.h:1726
Demod256qamSoftSse
void Demod256qamSoftSse(const float *vec_in, int8_t *llr, int num)
Definition: modulation.cc:2201
bench_multiply_transpose
static double bench_multiply_transpose(unsigned Nx, unsigned Ny, unsigned iterations)
Definition: test_matrix.cc:111
LDPCconfig::NumRows
size_t NumRows() const
Definition: ldpc_config.h:52
moodycamel::ConcurrentQueue::ImplicitProducerHash::entries
ImplicitProducerKVP * entries
Definition: concurrentqueue.h:3314
p
for p
Definition: process_rx_frame.m:36
flushCache
int flushCache()
Definition: test_mufft.cc:16
main
int main(int argc, char *argv[])
Definition: test_matrix.cc:228
moodycamel::ConcurrentQueue< EventData >::AllocationMode
AllocationMode
Definition: concurrentqueue.h:1338
moodycamel::details::swap_relaxed
static void swap_relaxed(std::atomic< T > &left, std::atomic< T > &right)
Definition: concurrentqueue.h:507
moodycamel::details::ConcurrentQueueProducerTypelessBase::inactive
std::atomic< bool > inactive
Definition: concurrentqueue.h:426
kNumFillerBits
static constexpr size_t kNumFillerBits
Definition: test_ldpc.cc:25
memory_manage.h
Demod256qamHardSse
void Demod256qamHardSse(float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:1329
bench_ZF
static double bench_ZF(unsigned Nx, unsigned Ny, unsigned iterations)
Definition: test_matrix.cc:47
moodycamel::ConcurrentQueue::ImplicitProducer::~ImplicitProducer
~ImplicitProducer()
Definition: concurrentqueue.h:2416
moodycamel::ConcurrentQueue::consumer_token_t
::moodycamel::ConsumerToken consumer_token_t
Definition: concurrentqueue.h:750
moodycamel::ConcurrentQueue::Block::set_all_empty
void set_all_empty()
Definition: concurrentqueue.h:1616
moodycamel::ConcurrentQueue::FreeListNode::freeListRefs
std::atomic< std::uint32_t > freeListRefs
Definition: concurrentqueue.h:1421
moodycamel::ConcurrentQueue::aligned_malloc
static void * aligned_malloc(size_t size)
Definition: concurrentqueue.h:3574
moodycamel::ConcurrentQueue::BLOCK_SIZE
static const size_t BLOCK_SIZE
Definition: concurrentqueue.h:755
fmt::v8::align::right
@ right
Definition: core.h:2021
moodycamel::ConcurrentQueueDefaultTraits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE
static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE
Definition: concurrentqueue.h:368
moodycamel::details::max_align_t::z
void * z
Definition: concurrentqueue.h:310
Config::BsAntNum
size_t BsAntNum() const
Definition: config.h:35
moodycamel::ConcurrentQueue::create
static U * create()
Definition: concurrentqueue.h:3623
kIpv6Address
static const std::string kIpv6Address
Definition: test_udp_client_server.cc:20
BitsToBytes
static size_t BitsToBytes(size_t n_bits)
Definition: utils_ldpc.h:124
moodycamel::ConcurrentQueue::Block::freeListNext
std::atomic< Block * > freeListNext
Definition: concurrentqueue.h:1657
UDPServer
Basic UDP server class based on OS sockets that supports receiving messages.
Definition: udp_server.h:13
K
#define K
Definition: test_transpose.cc:19
MasterToWorkerDynamicWorker
void MasterToWorkerDynamicWorker(Config *cfg, size_t worker_id, moodycamel::ConcurrentQueue< EventData > &event_queue, moodycamel::ConcurrentQueue< EventData > &complete_task_queue, moodycamel::ProducerToken *ptok, Table< complex_float > &data_buffer, PtrGrid< kFrameWnd, kMaxDataSCs, complex_float > &ul_beam_matrices, Table< complex_float > &ue_spec_pilot_buffer, Table< complex_float > &equal_buffer, PtrCube< kFrameWnd, kMaxSymbols, kMaxUEs, int8_t > &demod_buffers_, PhyStats *phy_stats, Stats *stats)
Definition: test_demul_threaded.cc:64
MOODYCAMEL_DELETE_FUNCTION
#define MOODYCAMEL_DELETE_FUNCTION
Definition: concurrentqueue.h:232
udp_server.h
Provides the UDPServer functions from the UDPComm class. Receiver only support.
y
y
Definition: simulate_performance.m:74
TEST
TEST(TestPtrGrid, Basic)
Definition: test_ptr_grid.cc:10
moodycamel::ConcurrentQueue::globalExplicitConsumerOffset
std::atomic< std::uint32_t > globalExplicitConsumerOffset
Definition: concurrentqueue.h:3665
moodycamel::ConcurrentQueue::FreeList::SHOULD_BE_ON_FREELIST
static const std::uint32_t SHOULD_BE_ON_FREELIST
Definition: concurrentqueue.h:1524
moodycamel::ConcurrentQueueDefaultTraits::free
static void free(void *ptr)
Definition: concurrentqueue.h:395
moodycamel::ConcurrentQueue::try_enqueue
bool try_enqueue(producer_token_t const &token, T &&item)
Definition: concurrentqueue.h:1067
moodycamel::ConcurrentQueue::ProducerBase::isExplicit
bool isExplicit
Definition: concurrentqueue.h:1735
moodycamel::ConcurrentQueueDefaultTraits::malloc
static void * malloc(size_t size)
Definition: concurrentqueue.h:394
kPrintUplinkInformationBytes
static constexpr bool kPrintUplinkInformationBytes
Definition: test_ldpc_mod.cc:29
Demod16qamHardLoop
void Demod16qamHardLoop(const float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:322
fclose
fclose(fileID)
UDPSend
void UDPSend(const std::string &local_address, uint16_t local_port, const std::string &remote_address, uint16_t remote_port)
Test the connection use case.
Definition: test_udp_client_server.cc:61
Demod64qamHardLoop
void Demod64qamHardLoop(const float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:699
demod_16qam_loop2
static void demod_16qam_loop2(float *vec_in, uint8_t *vec_out, int ue_num)
Definition: test_fft_mkl.cc:261
TEST
TEST(TestZF, Perf)
Measure performance of zeroforcing.
Definition: test_zf.cc:10
threads
Pilot RX by socket threads(=reference time)
EventData::tags_
std::array< size_t, kMaxTags > tags_
Definition: message.h:146
moodycamel::ConcurrentQueue::implicit_context
@ implicit_context
Definition: concurrentqueue.h:1536
std::swap
void swap(nlohmann::basic_json< ObjectType, ArrayType, StringType, BooleanType, NumberIntegerType, NumberUnsignedType, NumberFloatType, AllocatorType, JSONSerializer, BinaryType > &j1, nlohmann::basic_json< ObjectType, ArrayType, StringType, BooleanType, NumberIntegerType, NumberUnsignedType, NumberFloatType, AllocatorType, JSONSerializer, BinaryType > &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name) is_nothrow_move_constructible< nlohmann::basic_json< ObjectType, ArrayType, StringType, BooleanType, NumberIntegerType, NumberUnsignedType, NumberFloatType, AllocatorType, JSONSerializer, BinaryType > >::value &&//NOLINT(misc-redundant-expression) is_nothrow_move_assignable< nlohmann::basic_json< ObjectType, ArrayType, StringType, BooleanType, NumberIntegerType, NumberUnsignedType, NumberFloatType, AllocatorType, JSONSerializer, BinaryType > >::value)
exchanges the values of two JSON objects
Definition: json.hpp:24114
spdlog::details::os::now
SPDLOG_INLINE spdlog::log_clock::time_point now() SPDLOG_NOEXCEPT
Definition: os-inl.h:71
main
int main(int argc, char **argv)
Definition: test_zf_threaded.cc:169
moodycamel::ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE
static const size_t EXPLICIT_INITIAL_INDEX_SIZE
Definition: concurrentqueue.h:757
moodycamel::ConcurrentQueueDefaultTraits::index_t
std::size_t index_t
Definition: concurrentqueue.h:335
main
int main()
Definition: test_ldpc.cc:30
moodycamel::ConcurrentQueue::CannotAlloc
@ CannotAlloc
Definition: concurrentqueue.h:1338
moodycamel::ConcurrentQueue::implicitProducerHash
std::atomic< ImplicitProducerHash * > implicitProducerHash
Definition: concurrentqueue.h:3658
TOSTRING
#define TOSTRING(x)
Definition: symbols.h:14
flushCacheRuntime
int flushCacheRuntime(long *p, long long p_size)
Definition: test_fft_mkl.cc:33
moodycamel::ConcurrentQueue::enqueue_bulk
bool enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count)
Definition: concurrentqueue.h:1029
moodycamel::ConcurrentQueue::ImplicitProducer
friend struct ImplicitProducer
Definition: concurrentqueue.h:1334
moodycamel::ConcurrentQueue::recycle_or_create_producer
ProducerBase * recycle_or_create_producer(bool isExplicit)
Definition: concurrentqueue.h:3205
moodycamel::ProducerToken::producer
details::ConcurrentQueueProducerTypelessBase * producer
Definition: concurrentqueue.h:691
moodycamel::ConcurrentQueue::inner_enqueue_bulk
bool inner_enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count)
Definition: concurrentqueue.h:1359
SimdAlignCxFltVector
std::vector< std::complex< float >, boost::alignment::aligned_allocator< std::complex< float >, kSimdAlignment > > SimdAlignCxFltVector
Definition: simd_types.h:30
x
x
Definition: simulate_performance.m:69
test_get_time
static double test_get_time(void)
Definition: test_matrix.cc:12
moodycamel::ConcurrentQueue::ExplicitProducer::pr_blockIndexFront
size_t pr_blockIndexFront
Definition: concurrentqueue.h:2386
bench_fft_1d_0
static double bench_fft_1d_0(unsigned N, unsigned iterations, int direction)
Definition: test_mufft.cc:55
MOODYCAMEL_NOEXCEPT
#define MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:206
ItemT::ItemT
ItemT(size_t value)
Definition: test_concurrent_queue.cc:16
Direction::kUplink
@ kUplink
moodycamel::details::max_align_t
Definition: concurrentqueue.h:307
Config::Frame
const FrameStats & Frame() const
Definition: config.h:340
main
int main(int argc, char **argv)
Definition: test_zf.cc:72
count
count
Definition: inspect_agora_results.m:96
moodycamel::details::invalid_thread_id
static const thread_id_t invalid_thread_id
Definition: concurrentqueue.h:155
kVerbose
static constexpr bool kVerbose
Definition: test_ldpc_baseband.cc:26
moodycamel::ConcurrentQueue::try_enqueue
bool try_enqueue(T &&item)
Definition: concurrentqueue.h:1050
moodycamel::ConcurrentQueue::ProducerBase::getTail
index_t getTail() const
Definition: concurrentqueue.h:1724
moodycamel::ConcurrentQueue::Block::next
Block * next
Definition: concurrentqueue.h:1652
moodycamel::ConcurrentQueue::ExplicitProducer::BlockIndexEntry::block
Block * block
Definition: concurrentqueue.h:2329
temp
temp
Definition: parse_dl_file.m:5
kNumWorkers
static constexpr size_t kNumWorkers
Definition: test_concurrent_queue.cc:7
kAntTestNum
static constexpr size_t kAntTestNum
Definition: test_zf_threaded.cc:14
moodycamel::ConcurrentQueue::ProducerBase::dequeue_bulk
size_t dequeue_bulk(It &itemFirst, size_t max)
Definition: concurrentqueue.h:1705
GetTime::CyclesToUs
static double CyclesToUs(size_t cycles, double freq_ghz)
Definition: gettime.h:97
ThreadType::kWorker
@ kWorker
kEnableEarlyTermination
static constexpr bool kEnableEarlyTermination
Definition: test_ldpc.cc:24
Demod64qamSoftSse
void Demod64qamSoftSse(float *vec_in, int8_t *llr, int num)
Definition: modulation_srslte.cc:131
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexEntry::value
std::atomic< Block * > value
Definition: concurrentqueue.h:2873
moodycamel::ConcurrentQueue::initialBlockPool
Block * initialBlockPool
Definition: concurrentqueue.h:3649
moodycamel::ConcurrentQueue::explicit_context
@ explicit_context
Definition: concurrentqueue.h:1536
moodycamel::ProducerToken::valid
bool valid() const
Definition: concurrentqueue.h:672
DataGenerator::GenCodeblock
void GenCodeblock(Direction dir, const int8_t *input_ptr, std::vector< int8_t > &encoded_codeword)
Generate the encoded bit sequence for one code block for the active LDPC configuration from the input...
Definition: data_generator.h:87
moodycamel::details::thread_id_converter::thread_id_hash_t
thread_id_t thread_id_hash_t
Definition: concurrentqueue.h:85
LdpcEncodingEncodedBufSize
static size_t LdpcEncodingEncodedBufSize(size_t base_graph, size_t zc)
Definition: utils_ldpc.h:181
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexHeader
Definition: concurrentqueue.h:2876
moodycamel::ConcurrentQueue::ProducerBase::parent
ConcurrentQueue * parent
Definition: concurrentqueue.h:1736
DataGenerator::BinForIfft
std::vector< complex_float > BinForIfft(const std::vector< complex_float > &modulated_codeword) const
An array with OfdmDataNum() elements with the OfdmDataNum() modulated elements binned at the center.
Definition: data_generator.h:169
moodycamel::ConcurrentQueue::try_dequeue_bulk
size_t try_dequeue_bulk(consumer_token_t &token, It itemFirst, size_t max)
Definition: concurrentqueue.h:1228
moodycamel::ConcurrentQueue::update_current_producer_after_rotation
bool update_current_producer_after_rotation(consumer_token_t &token)
Definition: concurrentqueue.h:1371
AllocBuffer1d
static void AllocBuffer1d(T **buffer, U dim, Agora_memory::Alignment_t alignment, int init_zero)
Definition: memory_manage.h:105
Demod16qamHardSse
void Demod16qamHardSse(float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:345
MOODYCAMEL_CONSTEXPR_IF
#define MOODYCAMEL_CONSTEXPR_IF
Definition: concurrentqueue.h:167
Table< complex_float >
moodycamel::ConcurrentQueue::FreeList::operator=
FreeList & operator=(FreeList const &)=delete
MOODYCAMEL_ALIGNED_TYPE_LIKE
#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj)
Definition: concurrentqueue.h:257
moodycamel::ConcurrentQueue::swap_implicit_producer_hashes
void swap_implicit_producer_hashes(ConcurrentQueue &other)
Definition: concurrentqueue.h:3336
cpu_attach.h
Declaration file for cpu attachment helper functions.
main
int main(int argc, char *argv[])
Definition: test_mufft.cc:123
index
index
Definition: parse_all_dl.m:11
main
int main(int argc, char **argv)
Definition: test_ptr_grid.cc:48
WorkerToMasterWorkerWithToken
void WorkerToMasterWorkerWithToken(size_t worker_id, moodycamel::ConcurrentQueue< ItemT > *queue, moodycamel::ProducerToken *ptok)
Definition: test_concurrent_queue.cc:75
moodycamel::ConcurrentQueue::inner_enqueue
bool inner_enqueue(producer_token_t const &token, U &&element)
Definition: concurrentqueue.h:1346
TEST
TEST(TestDemul, VaryingConfig)
Definition: test_demul_threaded.cc:117
main
int main(int argc, char **argv)
Definition: test_armadillo.cc:80
T
T
Definition: simulate_performance.m:4
moodycamel::ConcurrentQueue::ProducerBase::dequeueOvercommit
std::atomic< index_t > dequeueOvercommit
Definition: concurrentqueue.h:1730
EventType::kBeam
@ kBeam
UDPClient::Connect
ssize_t Connect(const std::string &remote_address, uint16_t remote_port)
The remote_address | remote_port is the address to which datagrams are sent. 1:1 association me->remo...
Definition: udp_client.h:34
run_benchmark_1d
static void run_benchmark_1d(unsigned N, unsigned iterations)
Definition: test_mufft.cc:90
snr
snr
Definition: inspect_agora_results.m:118
moodycamel::ConcurrentQueue::Block::emptyFlags
std::atomic< bool > emptyFlags[BLOCK_SIZE<=EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE :1]
Definition: concurrentqueue.h:1654
gen_tag_t
Definition: message.h:22
InitModulationTable
void InitModulationTable(Table< complex_float > &mod_table, size_t mod_order)
Definition: modulation.cc:42
moodycamel::ConcurrentQueue::ProducerBase
Definition: concurrentqueue.h:1678
filename
filename
Definition: parse_all_dl.m:14
moodycamel::ConcurrentQueue::ExplicitProducer::blockIndex
std::atomic< BlockIndexHeader * > blockIndex
Definition: concurrentqueue.h:2381
fmt::v8::detail::abs
constexpr std::chrono::duration< Rep, Period > abs(std::chrono::duration< Rep, Period > d)
Definition: chrono.h:1488
moodycamel::ConcurrentQueue::try_enqueue
bool try_enqueue(producer_token_t const &token, T const &item)
Definition: concurrentqueue.h:1059
moodycamel::ConsumerToken::ConcurrentQueueTests
friend class ConcurrentQueueTests
Definition: concurrentqueue.h:729
MOODYCAMEL_NO_TSAN
#define MOODYCAMEL_NO_TSAN
Definition: concurrentqueue.h:266
Config::MCSParams
const nlohmann::json & MCSParams(Direction dir) const
Definition: config.h:288
Config::OfdmDataNum
size_t OfdmDataNum() const
Definition: config.h:47
TryEnqueueFallback
static void TryEnqueueFallback(moodycamel::ConcurrentQueue< EventData > *mc_queue, moodycamel::ProducerToken *producer_token, const EventData &event)
Definition: concurrent_queue_wrapper.h:18
fmt::v8::runtime
auto runtime(const S &s) -> basic_runtime< char_t< S >>
Definition: core.h:3098
main
int main(int argc, char *argv[])
Definition: test_fft_mkl.cc:467
ItemT::value_
size_t value_
Definition: test_concurrent_queue.cc:11
moodycamel::ConcurrentQueue::ProducerBase::next_prod
ProducerBase * next_prod() const
Definition: concurrentqueue.h:1715
Demod64qamSoftAvx2
void Demod64qamSoftAvx2(float *vec_in, int8_t *llr, int num)
Definition: modulation.cc:1103
flushCache
void flushCache()
Definition: test_modulation.cc:14
moodycamel::details::static_is_lock_free_num
Definition: concurrentqueue.h:618
kSendPort
static constexpr size_t kSendPort
Definition: test_udp_client_server.cc:13
SimdConvertShortToFloat
static void SimdConvertShortToFloat(const short *in_buf, float *out_buf, size_t n_elems)
Definition: datatype_conversion.h:126
moodycamel::ConcurrentQueue::freeList
FreeList< Block > freeList
Definition: concurrentqueue.h:3653
kNumPackets
static constexpr size_t kNumPackets
Definition: test_udp_client_server.cc:16
RandFloat
float RandFloat(float min, float max)
Definition: test_ldpc_mod.cc:42
bench_demod
static double bench_demod(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:279
Demod256qamHardAvx2
void Demod256qamHardAvx2(float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:1569
data_generator.h
Implementation file for the Data generator class to generate binary files as inputs to Agora,...
EventData
Definition: message.h:142
MasterToWorkerStaticMaster
void MasterToWorkerStaticMaster(moodycamel::ConcurrentQueue< ItemT > *queue, moodycamel::ProducerToken **ptoks)
Definition: test_concurrent_queue.cc:21
fmt::v8::detail::num_bits
constexpr auto num_bits() -> int
Definition: format.h:343
moodycamel::ConcurrentQueue::populate_initial_implicit_producer_hash
void populate_initial_implicit_producer_hash()
Definition: concurrentqueue.h:3318
PtrGrid< kFrameWnd, kMaxDataSCs, complex_float >
moodycamel::ConcurrentQueueDefaultTraits
Definition: concurrentqueue.h:320
kMaxDecoderIters
static constexpr size_t kMaxDecoderIters
Definition: test_ldpc.cc:26
moodycamel::ConcurrentQueue::ExplicitProducer::BlockIndexHeader::entries
BlockIndexEntry * entries
Definition: concurrentqueue.h:2336
BS_ANT
#define BS_ANT
Definition: test_transpose.cc:18
moodycamel::details::static_is_lock_free
Definition: concurrentqueue.h:624
Demod16qamSoftSse
void Demod16qamSoftSse(float *vec_in, int8_t *llr, int num)
Definition: modulation_srslte.cc:36
main
int main(int argc, char **argv)
Definition: test_avx512_complex_mul.cc:107
run_benchmark_1d
static void run_benchmark_1d(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:345
plot_csv.row
list row
Definition: plot_csv.py:11
DataGenerator::Profile::kRandom
@ kRandom
Demod16qamSoftLoop
void Demod16qamSoftLoop(const float *vec_in, int8_t *llr, int num)
Definition: modulation_srslte.cc:24
moodycamel
Definition: concurrentqueue.h:82
moodycamel::ConcurrentQueue::operator=
ConcurrentQueue & operator=(ConcurrentQueue const &)=delete
bench_data_type_convert
static double bench_data_type_convert(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:142
simd_types.h
Aligned types for SIMD compatibility.
ItemT::padding_
size_t padding_[7]
Definition: test_concurrent_queue.cc:12
fmt::v8::ptr
auto ptr(T p) -> const void *
Definition: format.h:2680
moodycamel::details::const_numeric_max
Definition: concurrentqueue.h:292
message.h
Self defined functions for message storage and passing.
moodycamel::ConcurrentQueue::inner_enqueue
bool inner_enqueue(U &&element)
Definition: concurrentqueue.h:1352
moodycamel::details::thread_id_converter::prehash
static thread_id_hash_t prehash(thread_id_t const &x)
Definition: concurrentqueue.h:86
Table::Free
void Free()
Definition: memory_manage.h:84
moodycamel::ConcurrentQueue::ImplicitProducer::dequeue_bulk
size_t dequeue_bulk(It &itemFirst, size_t max)
Definition: concurrentqueue.h:2765
moodycamel::ConcurrentQueue::ExplicitProducer::BlockIndexEntry
Definition: concurrentqueue.h:2326
moodycamel::ConcurrentQueue::Block::operator[]
T const * operator[](index_t idx) const noexcept
Definition: concurrentqueue.h:1646
moodycamel::ConcurrentQueue::Block::set_empty
bool set_empty(index_t i)
Definition: concurrentqueue.h:1576
moodycamel::details::std_max_align_t
std::max_align_t std_max_align_t
Definition: concurrentqueue.h:302
moodycamel::details::hash_thread_id
static size_t hash_thread_id(thread_id_t id)
Definition: concurrentqueue.h:461
kReceivePort
static constexpr size_t kReceivePort
Definition: test_udp_client_server.cc:14
nlohmann::json_v3_11_1NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON::detail2::begin
begin_tag begin(T &&...)
LdpcGetMinZc
static size_t LdpcGetMinZc()
Definition: utils_ldpc.h:187
kVerbose
static constexpr bool kVerbose
Definition: test_ldpc_mod.cc:28
moodycamel::ConcurrentQueueDefaultTraits::MAX_SEMA_SPINS
static const int MAX_SEMA_SPINS
Definition: concurrentqueue.h:380
moodycamel::ConcurrentQueue::try_dequeue_from_producer
bool try_dequeue_from_producer(producer_token_t const &producer, U &item)
Definition: concurrentqueue.h:1279
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexEntry::key
std::atomic< index_t > key
Definition: concurrentqueue.h:2872
len
uint16_t len
Definition: eth_common.h:62
dodemul.h
Declaration file for the DoDemul class.
scrambler.h
Scramble Class and helper functions.
i
for i
Definition: generate_data.m:107
TEST
TEST(TestDemod256QAM, SoftLoop)
Definition: test_256qam_demod.cc:148
nlohmann::json_v3_11_1NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON::detail::void
j template void())
Definition: json.hpp:4744
moodycamel::ConcurrentQueue::swap_internal
ConcurrentQueue & swap_internal(ConcurrentQueue &other)
Definition: concurrentqueue.h:940
moodycamel::ConcurrentQueue::enqueue
bool enqueue(T const &item)
Definition: concurrentqueue.h:974
moodycamel::ProducerToken::~ProducerToken
~ProducerToken()
Definition: concurrentqueue.h:674
DataGenerator
Building blocks for generating end-to-end or unit test workloads for Agora.
Definition: data_generator.h:21
GetTime::NanoSleep
static void NanoSleep(size_t ns, double freq_ghz)
Sleep for some nanoseconds.
Definition: gettime.h:39
kNoiseLevels
static constexpr float kNoiseLevels[15]
Definition: test_ldpc_baseband.cc:28
Catch::cout
std::ostream & cout()
Demod16qamHardAvx2
void Demod16qamHardAvx2(float *vec_in, uint8_t *vec_out, int num)
Definition: modulation.cc:472
moodycamel::details::nomove_if< false >::eval
static auto eval(U &&x) -> decltype(std::forward< U >(x))
Definition: concurrentqueue.h:534
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexEntry
Definition: concurrentqueue.h:2870
moodycamel::ConcurrentQueue::ImplicitProducerKVP
Definition: concurrentqueue.h:3280
moodycamel::ConcurrentQueue::ExplicitProducer::enqueue_bulk
bool enqueue_bulk(It itemFirst, size_t count)
Definition: concurrentqueue.h:2040
nlohmann::json_v3_11_1NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON::detail::hash
std::size_t hash(const BasicJsonType &j)
hash a JSON value
Definition: json.hpp:5867
moodycamel::ConcurrentQueue::FreeList::freeListHead
std::atomic< N * > freeListHead
Definition: concurrentqueue.h:1521
u
Plot Rx waveform for u
Definition: inspect_single_frame.m:108
moodycamel::ConcurrentQueue::nextExplicitConsumerId
std::atomic< std::uint32_t > nextExplicitConsumerId
Definition: concurrentqueue.h:3664
GetTime::Rdtsc
static size_t Rdtsc()
Return the TSC.
Definition: gettime.h:25
moodycamel::ConsumerToken::ConsumerToken
ConsumerToken(ConcurrentQueue< T, Traits > &q)
Definition: concurrentqueue.h:3697
moodycamel::ConcurrentQueue::ProducerBase::dequeue
bool dequeue(U &element)
Definition: concurrentqueue.h:1694
comms-lib.h
Communications Library: a) Generate pilot/preamble sequences b) OFDM modulation.
ModSingle
complex_float ModSingle(int x, Table< complex_float > &mod_table)
Definition: modulation.cc:209
MOODYCAMEL_TRY
#define MOODYCAMEL_TRY
Definition: concurrentqueue.h:179
moodycamel::ConcurrentQueue::ImplicitProducer::get_block_index_index_for_index
size_t get_block_index_index_for_index(index_t index, BlockIndexHeader *&localBlockIndex) const
Definition: concurrentqueue.h:2931
moodycamel::ConcurrentQueue::create
static U * create(A1 &&a1)
Definition: concurrentqueue.h:3630
moodycamel::ConcurrentQueue::ConcurrentQueue
ConcurrentQueue(ConcurrentQueue &&other) noexcept
Definition: concurrentqueue.h:890
UDPRecv
void UDPRecv(const std::string &local_address, uint16_t local_port, const std::string &remote_address, uint16_t remote_port)
Test the connection use case.
Definition: test_udp_client_server.cc:157
fmt::v8::fprintf
auto fprintf(std::FILE *f, const S &fmt, const T &... args) -> int
Definition: printf.h:607
moodycamel::ConcurrentQueue::ConcurrentQueue
ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
Definition: concurrentqueue.h:816
Agora_memory::Alignment_t::kAlign64
@ kAlign64
moodycamel::ConcurrentQueue::ImplicitProducer::rewind_block_index_tail
void rewind_block_index_tail()
Definition: concurrentqueue.h:2918
moodycamel::ProducerToken::ConcurrentQueueTests
friend class ConcurrentQueueTests
Definition: concurrentqueue.h:688
CheckArmaMemoryState
static void CheckArmaMemoryState(arma::uhword state, bool dynamic)
Definition: test_armadillo.cc:13
moodycamel::ConcurrentQueue::ImplicitProducer::nextBlockIndexCapacity
size_t nextBlockIndexCapacity
Definition: concurrentqueue.h:2994
MOODYCAMEL_NOEXCEPT_CTOR
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)
Definition: concurrentqueue.h:207
UDPComm::Recv
ssize_t Recv(std::byte *buf, size_t len) const
Try to receive up to len bytes in buf by default this will not block.
Definition: udp_comm.cc:372
kFrameOffsets
static constexpr size_t kFrameOffsets[kAntTestNum]
Definition: test_zf_threaded.cc:16
moodycamel::ConcurrentQueue::inner_enqueue_bulk
bool inner_enqueue_bulk(It itemFirst, size_t count)
Definition: concurrentqueue.h:1365
UDPComm::Connect
ssize_t Connect(const std::string &remote_address, uint16_t remote_port) const
The remote_address | remote_port is the address to which datagrams are sent. 1:1 association me<->rem...
Definition: udp_comm.cc:250
moodycamel::ConcurrentQueue::producerCount
std::atomic< std::uint32_t > producerCount
Definition: concurrentqueue.h:3646
Direction::kDownlink
@ kDownlink
Demod16qamSoftAvx2
void Demod16qamSoftAvx2(float *vec_in, int8_t *llr, int num)
Definition: modulation.cc:601
moodycamel::ConcurrentQueue::ExplicitProducer::BlockIndexHeader::prev
void * prev
Definition: concurrentqueue.h:2337
SimdConvertFloat32ToFloat16
static void SimdConvertFloat32ToFloat16(float *out_buf, const float *in_buf, size_t n_elems)
Definition: datatype_conversion.h:576
gen_tag_t::tag_
size_t tag_
Definition: message.h:43
main
int main(int argc, char **argv)
Definition: test_datatype_conversion.cc:324
MasterToWorkerDynamicMaster
void MasterToWorkerDynamicMaster(Config *cfg, moodycamel::ConcurrentQueue< EventData > &event_queue, moodycamel::ConcurrentQueue< EventData > &complete_task_queue)
Definition: test_demul_threaded.cc:23
moodycamel::ConcurrentQueue::ExplicitProducer::new_block_index
bool new_block_index(size_t numberOfFilledSlotsToExpose)
Definition: concurrentqueue.h:2341
moodycamel::details::ceil_to_pow_2
static T ceil_to_pow_2(T x)
Definition: concurrentqueue.h:490
moodycamel::ConcurrentQueue::FreeList
Definition: concurrentqueue.h:1429
moodycamel::ConcurrentQueue::Block::Block
Block()
Definition: concurrentqueue.h:1540
check
uint16_t check
Definition: eth_common.h:69
MOODYCAMEL_CATCH
#define MOODYCAMEL_CATCH(...)
Definition: concurrentqueue.h:180
kMaxItrNum
static constexpr size_t kMaxItrNum
Definition: test_demul_threaded.cc:15
moodycamel::ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
Definition: concurrentqueue.h:756
ModSingleUint8
complex_float ModSingleUint8(uint8_t x, Table< complex_float > &mod_table)
Definition: modulation.cc:213
kSnrLevels
static constexpr float kSnrLevels[15]
Definition: test_ldpc_mod.cc:33
moodycamel::ConcurrentQueue::ProducerBase::~ProducerBase
virtual ~ProducerBase()
Definition: concurrentqueue.h:1691
mufft_get_time
static double mufft_get_time(void)
Definition: test_mufft.cc:10
kFrameOffsets
static constexpr size_t kFrameOffsets[kModTestNum]
Definition: test_demul_threaded.cc:19
UDPClient::Send
void Send(const std::string &rem_hostname, uint16_t rem_port, const std::byte *msg, size_t len)
Send one UDP packet to a remote server. The client caches the the remote server's addrinfo after reso...
Definition: udp_client.h:50
moodycamel::details::thread_id_converter::thread_id_numeric_size_t
thread_id_t thread_id_numeric_size_t
Definition: concurrentqueue.h:84
moodycamel::ConcurrentQueue::ConcurrentQueueTests
friend class ConcurrentQueueTests
Definition: concurrentqueue.h:1336
num_workers_ready_atomic
std::atomic< size_t > num_workers_ready_atomic
Definition: test_zf_threaded.cc:18
nlohmann::json_v3_11_1NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON::basic_json
a class to store JSON values
Definition: json.hpp:3367
Config::DemulEventsPerSymbol
size_t DemulEventsPerSymbol() const
Definition: config.h:197
NUM_SYMBOLS
#define NUM_SYMBOLS
Definition: test_256qam_demod.cc:10
moodycamel::ConcurrentQueue::ImplicitProducerHash::prev
ImplicitProducerHash * prev
Definition: concurrentqueue.h:3315
LDPCconfig
Definition: ldpc_config.h:14
bench_mod_64qam
static double bench_mod_64qam(unsigned iterations, unsigned mode)
Definition: test_modulation.cc:136
MOODYCAMEL_NOEXCEPT_ASSIGN
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)
Definition: concurrentqueue.h:208
moodycamel::ConcurrentQueue::enqueue
bool enqueue(T &&item)
Definition: concurrentqueue.h:985
moodycamel::ConcurrentQueueDefaultTraits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
Definition: concurrentqueue.h:363
moodycamel::ConcurrentQueue::enqueue_bulk
bool enqueue_bulk(It itemFirst, size_t count)
Definition: concurrentqueue.h:1016
moodycamel::ConcurrentQueue::get_or_add_implicit_producer
ImplicitProducer * get_or_add_implicit_producer()
Definition: concurrentqueue.h:3374
bench_fft_1d
static double bench_fft_1d(unsigned N, unsigned iterations, int direction)
Definition: test_mufft.cc:26
moodycamel::ConcurrentQueue::ImplicitProducerKVP::ImplicitProducerKVP
ImplicitProducerKVP()
Definition: concurrentqueue.h:3285
moodycamel::details::const_numeric_max::value
static const T value
Definition: concurrentqueue.h:294
moodycamel::ConcurrentQueue::try_enqueue_bulk
bool try_enqueue_bulk(It itemFirst, size_t count)
Definition: concurrentqueue.h:1080
main
int main(int argc, char **argv)
Definition: test_concurrent_queue.cc:135
symbols.h
saveData
void saveData(char *filename, complex_float *ptr, int row, int col)
Definition: test_transpose.cc:39
gen_tag_t::FrmSc
static gen_tag_t FrmSc(size_t frame_id, size_t sc_id)
Definition: message.h:117
LdpcGetMaxZc
static size_t LdpcGetMaxZc()
Definition: utils_ldpc.h:190
moodycamel::ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE
static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE
Definition: concurrentqueue.h:760
moodycamel::ConcurrentQueue::initialBlockPoolIndex
std::atomic< size_t > initialBlockPoolIndex
Definition: concurrentqueue.h:3648
ue_num
for ue_num
Definition: parse_all_dl.m:84
spdlog::async_overflow_policy::block
@ block
FastRand::NextU32
uint32_t NextU32()
Definition: utils.h:189
moodycamel::details::max_align_t::y
long long y
Definition: concurrentqueue.h:309
kMaxDataSCs
static constexpr size_t kMaxDataSCs
Definition: symbols.h:283
moodycamel::ConcurrentQueue::Block::dynamicallyAllocated
bool dynamicallyAllocated
Definition: concurrentqueue.h:1659
kNumRows
static constexpr size_t kNumRows
Definition: test_ldpc.cc:28
Run256QamSoftDemod
static void Run256QamSoftDemod(void(*demod_func)(const float *, int8_t *, int), const char *func_desc)
Definition: test_256qam_demod.cc:51
MOODYCAMEL_MAYBE_UNUSED
#define MOODYCAMEL_MAYBE_UNUSED
Definition: concurrentqueue.h:168
kModTestNum
static constexpr size_t kModTestNum
Definition: test_demul_threaded.cc:16
Config::FreqGhz
double FreqGhz() const
Definition: config.h:56
moodycamel::ConcurrentQueue::FreeList::FreeList
FreeList(FreeList &&other)
Definition: concurrentqueue.h:1432
PtrGrid::mat_
std::array< std::array< T *, COLS >, ROWS > mat_
Definition: memory_manage.h:192
moodycamel::ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE
static const size_t IMPLICIT_INITIAL_INDEX_SIZE
Definition: concurrentqueue.h:758
MasterToWorkerDynamicMaster
void MasterToWorkerDynamicMaster(Config *cfg, moodycamel::ConcurrentQueue< EventData > &event_queue, moodycamel::ConcurrentQueue< EventData > &complete_task_queue)
Definition: test_zf_threaded.cc:20
main
int main(int argc, char **argv)
Definition: test_256qam_demod.cc:215
moodycamel::ConcurrentQueue::ExplicitProducer::dequeue
bool dequeue(U &element)
Definition: concurrentqueue.h:1940
MapModToStr
static std::string MapModToStr(size_t mod_order)
Definition: modulation.h:39
moodycamel::ConcurrentQueue::ImplicitProducerKVP::operator=
ImplicitProducerKVP & operator=(ImplicitProducerKVP &&other) noexcept
Definition: concurrentqueue.h:3293
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexHeader::tail
std::atomic< size_t > tail
Definition: concurrentqueue.h:2879
moodycamel::ConcurrentQueue::ProducerBase::dequeueOptimisticCount
std::atomic< index_t > dequeueOptimisticCount
Definition: concurrentqueue.h:1729
moodycamel::ConcurrentQueue::ExplicitProducer::pr_blockIndexEntries
BlockIndexEntry * pr_blockIndexEntries
Definition: concurrentqueue.h:2387
moodycamel::ConcurrentQueue::ExplicitProducer::~ExplicitProducer
~ExplicitProducer()
Definition: concurrentqueue.h:1768
main
int main(int argc, char *argv[])
Definition: test_ldpc_mod.cc:56
DataGenerator::Profile
Profile
Definition: data_generator.h:24
moodycamel::ConcurrentQueue
Definition: concurrentqueue.h:416
kMaxModType
static constexpr size_t kMaxModType
Definition: symbols.h:297
extract_version.data
dictionary data
Definition: extract_version.py:8
moodycamel::ConcurrentQueue::ImplicitProducer::enqueue
bool enqueue(U &&element)
Definition: concurrentqueue.h:2472
moodycamel::ConcurrentQueue::Block::set_many_empty
bool set_many_empty(index_t i, size_t count)
Definition: concurrentqueue.h:1595
n
n
Definition: simulate_performance.m:1
moodycamel::ConcurrentQueue::try_dequeue
bool try_dequeue(consumer_token_t &token, U &item)
Definition: concurrentqueue.h:1162
moodycamel::ConcurrentQueue::ImplicitProducer::new_block_index
bool new_block_index()
Definition: concurrentqueue.h:2949
main
int main(int argc, char **argv)
Definition: test_scrambler.cc:162
server_ready
static std::atomic< bool > server_ready
Definition: test_udp_client_server.cc:17
MOODYCAMEL_RETHROW
#define MOODYCAMEL_RETHROW
Definition: concurrentqueue.h:181
moodycamel::ConcurrentQueue::ImplicitProducerKVP::value
ImplicitProducer * value
Definition: concurrentqueue.h:3283
moodycamel::ConcurrentQueue::ImplicitProducerHash::capacity
size_t capacity
Definition: concurrentqueue.h:3313
moodycamel::ConcurrentQueue::FreeList::head_unsafe
N * head_unsafe() const
Definition: concurrentqueue.h:1492
moodycamel::details::circular_less_than
static bool circular_less_than(T a, T b)
Definition: concurrentqueue.h:469
num_workers_ready_atomic
static std::atomic< size_t > num_workers_ready_atomic
Definition: test_demul_threaded.cc:21
moodycamel::ConcurrentQueue::Block::elementsCompletelyDequeued
std::atomic< size_t > elementsCompletelyDequeued
Definition: concurrentqueue.h:1653
Demod256qamSoftLoop
void Demod256qamSoftLoop(const float *vec_in, int8_t *llr, int num)
Definition: modulation.cc:2157
bench_fft_1d_mkl
static double bench_fft_1d_mkl(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:40
moodycamel::ConcurrentQueue::Block::elements
details::identity< char[sizeof(T) *BLOCK_SIZE] >::type elements
Definition: concurrentqueue.h:1649
GetTime::MeasureRdtscFreq
static double MeasureRdtscFreq()
Definition: gettime.h:51
ServerConnect
void ServerConnect(const std::string &src_address, const uint16_t src_port, const std::string &dest_address, const uint16_t dest_port)
Definition: test_udp_client_server.cc:117
moodycamel::ConcurrentQueue::FreeList::swap
void swap(FreeList &other)
Definition: concurrentqueue.h:1433
Config::BeamEventsPerSymbol
size_t BeamEventsPerSymbol() const
Definition: config.h:202
moodycamel::details::ConcurrentQueueProducerTypelessBase::token
ProducerToken * token
Definition: concurrentqueue.h:427
kNEntries
const size_t kNEntries
Definition: test_ptr_grid.cc:8
moodycamel::ConcurrentQueue::ProducerBase::ProducerBase
ProducerBase(ConcurrentQueue *parent_, bool isExplicit_)
Definition: concurrentqueue.h:1680
GetTime::GetTime
static double GetTime()
Definition: gettime.h:22
FrameStats::NumULSyms
size_t NumULSyms() const
Definition: framestats.cc:85
GetTime::WorkerRdtsc
static size_t WorkerRdtsc()
Definition: gettime.h:34
moodycamel::details::ConcurrentQueueProducerTypelessBase
Definition: concurrentqueue.h:423
complex_float::real
float real
Definition: test_transpose.cc:23
kNumWorkers
static constexpr size_t kNumWorkers
Definition: test_zf_threaded.cc:11
kMaxTestNum
static constexpr size_t kMaxTestNum
Definition: test_demul_threaded.cc:14
LDPCconfig::ExpansionFactor
uint16_t ExpansionFactor() const
Definition: ldpc_config.h:47
GetTime::CyclesToMs
static double CyclesToMs(size_t cycles, double freq_ghz)
Definition: gettime.h:91
main
int main(int argc, char *argv[])
Definition: test_ldpc_baseband.cc:40
Agora_memory::Alignment_t::kAlign32
@ kAlign32
moodycamel::ConcurrentQueue::ExplicitProducer::pr_blockIndexRaw
void * pr_blockIndexRaw
Definition: concurrentqueue.h:2388
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexHeader::index
BlockIndexEntry ** index
Definition: concurrentqueue.h:2881
bench_fft_1d_mkl_out
static double bench_fft_1d_mkl_out(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:107
ClientConnect
void ClientConnect(const std::string &src_address, uint16_t src_port, const std::string &dest_address, uint16_t dest_port)
Test the connection use case.
Definition: test_udp_client_server.cc:42
moodycamel::details::deref_noexcept
static auto deref_noexcept(It &it) noexcept -> decltype(*it)
Definition: concurrentqueue.h:542
TEST
TEST(WLAN_Scrambler, fixed_input_scramble_int8_t)
Construct a new TEST object.
Definition: test_scrambler.cc:22
Config
Definition: config.h:26
moodycamel::ConcurrentQueue::implicitProducerHashResizeInProgress
std::atomic_flag implicitProducerHashResizeInProgress
Definition: concurrentqueue.h:3662
moodycamel::details::is_trivially_destructible
Definition: concurrentqueue.h:548
Config::UpdateUlMCS
void UpdateUlMCS(const nlohmann::json &mcs)
Definition: config.cc:747
FastRand
Definition: utils.h:179
ClientSendTo
void ClientSendTo(const std::string &src_address, uint16_t src_port, const std::string &dest_address, uint16_t dest_port)
Test the non-connection use case (SendTo)
Definition: test_udp_client_server.cc:23
run_benchmark_16qam
static void run_benchmark_16qam(unsigned iterations, unsigned mode)
Definition: test_modulation.cc:439
moodycamel::ConcurrentQueue::Block::shouldBeOnFreeList
std::atomic< bool > shouldBeOnFreeList
Definition: concurrentqueue.h:1658
bench_mod_16qam
static double bench_mod_16qam(unsigned iterations, unsigned mode)
Definition: test_modulation.cc:26
std
Definition: json.hpp:5213
LdpcNumInputBits
static size_t LdpcNumInputBits(size_t base_graph, size_t zc)
Definition: utils_ldpc.h:139
moodycamel::ConsumerToken::ConsumerToken
ConsumerToken(ConsumerToken &&other) noexcept
Definition: concurrentqueue.h:703
moodycamel::ConcurrentQueue::add_block_to_free_list
void add_block_to_free_list(Block *block)
Definition: concurrentqueue.h:3050
moodycamel::ConcurrentQueue::ImplicitProducer::ImplicitProducer
ImplicitProducer(ConcurrentQueue *parent_)
Definition: concurrentqueue.h:2408
moodycamel::ConcurrentQueue::enqueue
bool enqueue(producer_token_t const &token, T &&item)
Definition: concurrentqueue.h:1004
moodycamel::ConcurrentQueue::try_dequeue_non_interleaved
bool try_dequeue_non_interleaved(U &item)
Definition: concurrentqueue.h:1147
moodycamel::details::nomove
static T const & nomove(T const &x)
Definition: concurrentqueue.h:515
moodycamel::ConcurrentQueue::FreeList::try_get
N * try_get()
Definition: concurrentqueue.h:1452
moodycamel::ConcurrentQueue::ExplicitProducer::ExplicitProducer
ExplicitProducer(ConcurrentQueue *parent_)
Definition: concurrentqueue.h:1751
moodycamel::ProducerToken::ProducerToken
ProducerToken(ProducerToken &&other) noexcept
Definition: concurrentqueue.h:638
moodycamel::ConcurrentQueue::ImplicitProducerKVP::ImplicitProducerKVP
ImplicitProducerKVP(ImplicitProducerKVP &&other) noexcept
Definition: concurrentqueue.h:3287
dobeamweights.h
Declaration file for the DoBeamWeights class. Zero forcing for one subcarrier.
TEST
TEST(UDPClientServer, PerfIpv4)
Definition: test_udp_client_server.cc:197
moodycamel::details::identity::type
T type
Definition: concurrentqueue.h:254
fft_get_time
static double fft_get_time(void)
Definition: test_fft_mkl.cc:16
moodycamel::ConcurrentQueue::ProducerBase::headIndex
std::atomic< index_t > headIndex
Definition: concurrentqueue.h:1727
Demod256qamSoftAvx2
void Demod256qamSoftAvx2(const float *vec_in, int8_t *llr, int num)
Definition: modulation.cc:2365
kK5GnrNumPunctured
static constexpr size_t kK5GnrNumPunctured
Definition: test_ldpc.cc:27
moodycamel::ConcurrentQueue::ImplicitProducer::enqueue_bulk
bool enqueue_bulk(It itemFirst, size_t count)
Definition: concurrentqueue.h:2611
kSnrLevels
static constexpr float kSnrLevels[15]
Definition: test_ldpc_baseband.cc:31
fmt::v8::detail::copy
OutputIterator copy(const RangeT &range, OutputIterator out)
Definition: ranges.h:26
moodycamel::ConcurrentQueue::size_t
Traits::size_t size_t
Definition: concurrentqueue.h:753
LDPCconfig::NumCbLen
uint32_t NumCbLen() const
Definition: ldpc_config.h:50
moodycamel::ConcurrentQueue::FreeListNode::freeListNext
std::atomic< N * > freeListNext
Definition: concurrentqueue.h:1422
noise
noise
Definition: generate_data_dl.m:131
kMaxTestNum
static constexpr size_t kMaxTestNum
Definition: test_zf_threaded.cc:12
moodycamel::ConcurrentQueue::CanAlloc
@ CanAlloc
Definition: concurrentqueue.h:1338
kCols
static constexpr size_t kCols
Definition: test_ptr_grid.cc:6
run_benchmark_demod
static void run_benchmark_demod(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:439
moodycamel::BlockingConcurrentQueue
Definition: concurrentqueue.h:417
moodycamel::ConcurrentQueue::initialImplicitProducerHash
ImplicitProducerHash initialImplicitProducerHash
Definition: concurrentqueue.h:3660
to_string
std::string to_string() const
Definition: eth_common.h:64
SimdConvertFloatToShort
static void SimdConvertFloatToShort(const float *in_buf, short *out_buf, size_t n_elems, size_t n_prefix=0, float scale_down_factor=1.0f)
Definition: datatype_conversion.h:266
ThreadType::kMaster
@ kMaster
main
int main(int argc, char **argv)
Definition: test_transpose.cc:51
LDPCconfig::BaseGraph
uint16_t BaseGraph() const
Definition: ldpc_config.h:46
moodycamel::details::ConcurrentQueueProducerTypelessBase::ConcurrentQueueProducerTypelessBase
ConcurrentQueueProducerTypelessBase()
Definition: concurrentqueue.h:429
Config::BeamBlockSize
size_t BeamBlockSize() const
Definition: config.h:200
moodycamel::details::hash_32_or_64
Definition: concurrentqueue.h:459
moodycamel::details::likely
static bool() likely(bool x)
Definition: concurrentqueue.h:280
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexHeader::prev
BlockIndexHeader * prev
Definition: concurrentqueue.h:2882
moodycamel::ConsumerToken::lastKnownGlobalOffset
std::uint32_t lastKnownGlobalOffset
Definition: concurrentqueue.h:733
id
uint16_t id
Definition: eth_common.h:65
config.h
Declaration file for the configuration class which importants json configuration values into class va...
MasterToWorkerStaticWorker
void MasterToWorkerStaticWorker(size_t worker_id, moodycamel::ConcurrentQueue< ItemT > *queue, moodycamel::ProducerToken *ptok)
Definition: test_concurrent_queue.cc:28
kRows
static constexpr size_t kRows
Definition: test_ptr_grid.cc:5
NUM_ITERATIONS
#define NUM_ITERATIONS
Definition: test_256qam_demod.cc:11
load
end load("results.mat") colors
kNumWorkers
static constexpr size_t kNumWorkers
Definition: test_demul_threaded.cc:13
kMaxItrNum
static constexpr size_t kMaxItrNum
Definition: test_zf_threaded.cc:13
moodycamel::ConsumerToken::itemsConsumedFromCurrent
std::uint32_t itemsConsumedFromCurrent
Definition: concurrentqueue.h:734
RandFloatFromShort
float RandFloatFromShort(float min, float max)
Definition: test_ldpc_mod.cc:46
moodycamel::ConcurrentQueue::try_dequeue
bool try_dequeue(U &item)
Definition: concurrentqueue.h:1104
moodycamel::ConsumerToken
Definition: concurrentqueue.h:695
CommsLib::M256ComplexCf32Mult
static __m256 M256ComplexCf32Mult(__m256 data1, __m256 data2, bool conj)
Definition: comms-lib-avx.cc:196
moodycamel::ConsumerToken::currentProducer
details::ConcurrentQueueProducerTypelessBase * currentProducer
Definition: concurrentqueue.h:735
main
int main(int argc, char **argv)
Definition: test_demul_threaded.cc:189
moodycamel::ConsumerToken::operator=
ConsumerToken & operator=(ConsumerToken &&other) noexcept
Definition: concurrentqueue.h:708
moodycamel::ConcurrentQueue::try_get_block_from_initial_pool
Block * try_get_block_from_initial_pool()
Definition: concurrentqueue.h:3039
moodycamel::ConcurrentQueue::try_enqueue
bool try_enqueue(T const &item)
Definition: concurrentqueue.h:1039
PtrCube< kFrameWnd, kMaxSymbols, kMaxUEs, int8_t >
kNumInputBytes
static constexpr size_t kNumInputBytes
Definition: test_scrambler.cc:14
gettime.h
moodycamel::ConcurrentQueue::Block::is_empty
bool is_empty() const
Definition: concurrentqueue.h:1549
moodycamel::ConcurrentQueue::reown_producers
void reown_producers()
Definition: concurrentqueue.h:3265
kMaxUEs
static constexpr size_t kMaxUEs
Definition: symbols.h:289
UDPClient
Definition: udp_client.h:13
moodycamel::ConcurrentQueue::Block::operator[]
T * operator[](index_t idx) noexcept
Definition: concurrentqueue.h:1645
kModBitsNums
static constexpr size_t kModBitsNums[kModTestNum]
Definition: test_demul_threaded.cc:17
bench_ifft_1d_mkl
static double bench_ifft_1d_mkl(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:74
ApplyAwgn
static void ApplyAwgn(complex_float *signal, complex_float *output, int len, float snr)
Definition: test_256qam_demod.cc:20
bench_multiply_dim1
static double bench_multiply_dim1(unsigned Nx, unsigned Ny, unsigned iterations)
Definition: test_matrix.cc:67
MasterToWorkerDynamicWorker
void MasterToWorkerDynamicWorker(Config *cfg, size_t worker_id, moodycamel::ConcurrentQueue< EventData > &event_queue, moodycamel::ConcurrentQueue< EventData > &complete_task_queue, moodycamel::ProducerToken *ptok, PtrGrid< kFrameWnd, kMaxUEs, complex_float > &csi_buffers, Table< complex_float > &calib_dl_msum_buffer, Table< complex_float > &calib_ul_msum_buffer, Table< complex_float > &calib_dl_buffer, Table< complex_float > &calib_ul_buffer, PtrGrid< kFrameWnd, kMaxDataSCs, complex_float > &ul_beam_matrices, PtrGrid< kFrameWnd, kMaxDataSCs, complex_float > &dl_beam_matrices, PhyStats *phy_stats, Stats *stats)
Definition: test_zf_threaded.cc:52
moodycamel::details::invalid_thread_id2
static const thread_id_t invalid_thread_id2
Definition: concurrentqueue.h:156
moodycamel::ConcurrentQueueDefaultTraits::size_t
std::size_t size_t
Definition: concurrentqueue.h:323
DataGenerator::GetCommonPilotTimeDomain
std::vector< complex_float > GetCommonPilotTimeDomain() const
Return the time-domain pilot symbol with OfdmCaNum complex floats.
Definition: data_generator.h:179
moodycamel::details::ConcurrentQueueProducerTypelessBase::next
ConcurrentQueueProducerTypelessBase * next
Definition: concurrentqueue.h:425
moodycamel::ConcurrentQueue::implicitProducerHashCount
std::atomic< size_t > implicitProducerHashCount
Definition: concurrentqueue.h:3659
demod_16qam_loop
static void demod_16qam_loop(float *vec_in, uint8_t *vec_out, int ue_num)
Definition: test_fft_mkl.cc:243
fmt::v8::detail::is_signed
std::integral_constant< bool, std::numeric_limits< T >::is_signed||std::is_same< T, int128_t >::value > is_signed
Definition: format.h:883
moodycamel::ConcurrentQueue::ImplicitProducer::BlockIndexHeader::entries
BlockIndexEntry * entries
Definition: concurrentqueue.h:2880
phy_stats.h
Declaration file for the PhyStats class.
kShrtFltConvFactor
static constexpr float kShrtFltConvFactor
Definition: datatype_conversion.h:18
moodycamel::ConcurrentQueue::Block
Definition: concurrentqueue.h:1538
moodycamel::ConcurrentQueue::FreeListNode
Definition: concurrentqueue.h:1417
moodycamel::ConcurrentQueue::FreeList::add
void add(N *node)
Definition: concurrentqueue.h:1438
moodycamel::ConcurrentQueue::producer_token_t
::moodycamel::ProducerToken producer_token_t
Definition: concurrentqueue.h:749
moodycamel::ConcurrentQueue::producerListTail
std::atomic< ProducerBase * > producerListTail
Definition: concurrentqueue.h:3645
ServerRecvFrom
void ServerRecvFrom(const std::string &src_address, uint16_t src_port, const std::string &dest_address, uint16_t dest_port)
Definition: test_udp_client_server.cc:79
moodycamel::ConcurrentQueueDefaultTraits::MAX_SUBQUEUE_SIZE
static const size_t MAX_SUBQUEUE_SIZE
Definition: concurrentqueue.h:374
UDPServer::Recv
ssize_t Recv(std::byte *buf, size_t len) const
Try to receive up to len bytes in buf by default this will not block.
Definition: udp_server.h:55
moodycamel::details::_hash_32_or_64::hash
static std::uint32_t hash(std::uint32_t h)
Definition: concurrentqueue.h:436
moodycamel::ConcurrentQueueDefaultTraits::EXPLICIT_INITIAL_INDEX_SIZE
static const size_t EXPLICIT_INITIAL_INDEX_SIZE
Definition: concurrentqueue.h:353
moodycamel::ConcurrentQueue::FreeListNode::FreeListNode
FreeListNode()
Definition: concurrentqueue.h:1419
kPrintUplinkInformationBytes
static constexpr bool kPrintUplinkInformationBytes
Definition: test_ldpc_baseband.cc:27
gen_tag_t::frame_id_
uint32_t frame_id_
Definition: message.h:32
moodycamel::details::_hash_32_or_64< 1 >::hash
static std::uint64_t hash(std::uint64_t h)
Definition: concurrentqueue.h:450
kMaxAntennas
static constexpr size_t kMaxAntennas
Definition: symbols.h:286
moodycamel::details::max_align_t::x
std_max_align_t x
Definition: concurrentqueue.h:308
moodycamel::ConcurrentQueueDefaultTraits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
Definition: concurrentqueue.h:349
printbits
void printbits(uint8_t x)
Definition: test_modulation.cc:260
moodycamel::ConcurrentQueue::try_get_block_from_free_list
Block * try_get_block_from_free_list()
Definition: concurrentqueue.h:3067
moodycamel::ConcurrentQueue::ExplicitProducer::pr_blockIndexSize
size_t pr_blockIndexSize
Definition: concurrentqueue.h:2385
moodycamel::ConcurrentQueue::ProducerBase::tailBlock
Block * tailBlock
Definition: concurrentqueue.h:1732
moodycamel::ConcurrentQueue::ImplicitProducer::get_block_index_entry_for_index
BlockIndexEntry * get_block_index_entry_for_index(index_t index) const
Definition: concurrentqueue.h:2924
kNoiseLevels
static constexpr float kNoiseLevels[15]
Definition: test_ldpc_mod.cc:30
moodycamel::ConcurrentQueue::ConcurrentQueue
ConcurrentQueue(size_t capacity=6 *BLOCK_SIZE)
Definition: concurrentqueue.h:792
LdpcEncodingParityBufSize
static size_t LdpcEncodingParityBufSize(size_t base_graph, size_t zc)
Definition: utils_ldpc.h:174
max
max(y1, y1_1)
moodycamel::ConcurrentQueue::ExplicitProducer::BlockIndexHeader::front
std::atomic< size_t > front
Definition: concurrentqueue.h:2335
moodycamel::details::nomove_if::eval
static T const & eval(T const &x)
Definition: concurrentqueue.h:524
kIpv4Address
static const std::string kIpv4Address
Definition: test_udp_client_server.cc:19
Config::ModOrderBits
size_t ModOrderBits(Direction dir) const
Definition: config.h:247
OFDM
#define OFDM
Definition: test_transpose.cc:17
LDPCconfig::NumCbCodewLen
uint32_t NumCbCodewLen() const
Definition: ldpc_config.h:51
moodycamel::ConcurrentQueue::ExplicitProducer::BlockIndexEntry::base
index_t base
Definition: concurrentqueue.h:2328
FrameStats::GetULSymbol
size_t GetULSymbol(size_t location) const
Definition: framestats.cc:114
moodycamel::ConcurrentQueue::ExplicitProducer::pr_blockIndexSlotsUsed
size_t pr_blockIndexSlotsUsed
Definition: concurrentqueue.h:2384
TEST
TEST(TestZF, VaryingConfig)
Definition: test_zf_threaded.cc:108
ConvertFloatToShort
static void ConvertFloatToShort(const float *in_buf, short *out_buf, size_t n_elems, size_t n_prefix=0, float scale_down_factor=1.0f)
Definition: datatype_conversion.h:237
kBsAntNums
static constexpr size_t kBsAntNums[kAntTestNum]
Definition: test_zf_threaded.cc:15
moodycamel::ConcurrentQueue::enqueue
bool enqueue(producer_token_t const &token, T const &item)
Definition: concurrentqueue.h:995
kMaxTestNum
static constexpr size_t kMaxTestNum
Definition: test_concurrent_queue.cc:8
UDPComm::Send
void Send(const std::string &rem_hostname, uint16_t rem_port, const std::byte *msg, size_t len)
Send one UDP packet to a remote server. The client caches the the remote server's addrinfo after reso...
Definition: udp_comm.cc:270
moodycamel::ConcurrentQueue::ProducerBase::size_approx
size_t size_approx() const
Definition: concurrentqueue.h:1717
flushCache
int flushCache()
Definition: test_transpose.cc:29
concurrentqueue.h
moodycamel::ConcurrentQueue::initialBlockPoolSize
size_t initialBlockPoolSize
Definition: concurrentqueue.h:3650
ItemT::ItemT
ItemT()=default
AdaptBitsForMod
static void AdaptBitsForMod(const uint8_t *bit_seq_in, uint8_t *bytes_out, size_t len, size_t mod_type)
Fill-in the bytes of bytes_out with mod_type bits per byte, taken from the bit sequence bit_seq_in.
Definition: utils_ldpc.h:60
DataGenerator::GetModulation
std::vector< complex_float > GetModulation(const std::vector< int8_t > &encoded_codeword)
Return the output of modulating the encoded codeword.
Definition: data_generator.h:108
moodycamel::details::align_for
static char * align_for(char *ptr)
Definition: concurrentqueue.h:483
moodycamel::ProducerToken::swap
void swap(ProducerToken &other) noexcept
Definition: concurrentqueue.h:653
flushCache
int flushCache()
Definition: test_fft_mkl.cc:22
moodycamel::ConcurrentQueue::FreeList::add_knowing_refcount_is_zero
void add_knowing_refcount_is_zero(N *node)
Definition: concurrentqueue.h:1495
moodycamel::details::unlikely
static bool() unlikely(bool x)
Definition: concurrentqueue.h:281
WorkerToMasterWorkerWithoutToken
void WorkerToMasterWorkerWithoutToken(size_t worker_id, moodycamel::ConcurrentQueue< ItemT > *queue)
Definition: test_concurrent_queue.cc:87
moodycamel::ConcurrentQueue::index_t
Traits::index_t index_t
Definition: concurrentqueue.h:752
TEST
TEST(Modulation, adapt_bits_for_mod_one)
Definition: test_datatype_conversion.cc:15
moodycamel::ConcurrentQueue::requisition_block
Block * requisition_block()
Definition: concurrentqueue.h:3074
moodycamel::swap
void swap(typename ConcurrentQueue< T, Traits >::ImplicitProducerKVP &a, typename ConcurrentQueue< T, Traits >::ImplicitProducerKVP &b) noexcept
Definition: concurrentqueue.h:3729
moodycamel::details::nomove_if
Definition: concurrentqueue.h:521
nlohmann::json_v3_11_1NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON::detail2::end
end_tag end(T &&...)
fmt::v8::detail::digits::result
result
Definition: format-inl.h:640
Table::RandAllocCxFloat
void RandAllocCxFloat(size_t dim1, size_t dim2, Agora_memory::Alignment_t alignment)
Definition: memory_manage.h:69
moodycamel::ConcurrentQueue::recycle_or_create_producer
ProducerBase * recycle_or_create_producer(bool isExplicit, bool &recycled)
Definition: concurrentqueue.h:3211
PtrCube::cube_
std::array< std::array< std::array< T *, DIM3 >, DIM2 >, DIM1 > cube_
The pointer cells.
Definition: memory_manage.h:265
DEFINE_string
DEFINE_string(profile, "random", "The profile of the input user bytes (e.g., 'random', '123')")
fmt::v8::detail::type
type
Definition: core.h:1131
ofdmtxrx.mod_order
int mod_order
Definition: ofdmtxrx.py:397
main
int main(int argc, char **argv)
Definition: test_udp_client_server.cc:252
complex_float::imag
float imag
Definition: test_transpose.cc:24
run_benchmark_1d_ifft
static void run_benchmark_1d_ifft(unsigned N, unsigned iterations)
Definition: test_fft_mkl.cc:373
TEST
TEST(TestArmadillo, CorrectnessStackMemoryState0)
Definition: test_armadillo.cc:61
kMessageSize
static constexpr size_t kMessageSize
Definition: test_udp_client_server.cc:15
utils_ldpc.h
PtrGrid::RandAllocCxFloat
void RandAllocCxFloat(size_t n_entries)
Definition: memory_manage.h:166
moodycamel::ProducerToken::ProducerToken
ProducerToken(ConcurrentQueue< T, Traits > &queue)
Definition: concurrentqueue.h:3679
moodycamel::ConcurrentQueue::ImplicitProducer::INVALID_BLOCK_BASE
static const index_t INVALID_BLOCK_BASE
Definition: concurrentqueue.h:2868
moodycamel::ConcurrentQueue::~ConcurrentQueue
~ConcurrentQueue()
Definition: concurrentqueue.h:837