New upstream version 8.1.0
This commit is contained in:
2500
meta/source/pmq/pmq.cpp
Normal file
2500
meta/source/pmq/pmq.cpp
Normal file
File diff suppressed because it is too large
Load Diff
243
meta/source/pmq/pmq.hpp
Normal file
243
meta/source/pmq/pmq.hpp
Normal file
@@ -0,0 +1,243 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h> // uint64_t etc.
|
||||
#include <stddef.h> // size_t
|
||||
|
||||
struct PMQ_Enqueuer_Stats
|
||||
{
|
||||
// how many times was the buffer filled up (the flusher couldn't keep up)?
|
||||
uint64_t buffer_full_count;
|
||||
uint64_t total_messages_enqueued;
|
||||
uint64_t total_bytes_enqueued;
|
||||
};
|
||||
|
||||
struct PMQ_Persister_Stats
|
||||
{
|
||||
uint64_t num_async_flushes; // calls to pmq_sync()
|
||||
uint64_t wakeups;
|
||||
uint64_t fsync_calls;
|
||||
uint64_t wal_flushes;
|
||||
uint64_t wal_flush_bytes;
|
||||
};
|
||||
|
||||
struct PMQ_Stats
|
||||
{
|
||||
PMQ_Enqueuer_Stats enqueuer;
|
||||
PMQ_Persister_Stats persister;
|
||||
};
|
||||
|
||||
struct PMQ;
|
||||
|
||||
/* Parmeters for creating a new new queue object (see pmq_create()).
|
||||
* If basedir_path exists, try to load existing queue data structures from disk.
|
||||
* Otherwise, create the directory and initialize a new queue there.
|
||||
* A queue use approximately the number of bytes that were specified in
|
||||
* create_size at the time of creation. (Something like 2 GiB is not
|
||||
* unreasonable).
|
||||
*/
|
||||
struct PMQ_Init_Params
|
||||
{
|
||||
const char *basedir_path;
|
||||
uint64_t create_size;
|
||||
};
|
||||
|
||||
PMQ *pmq_create(const PMQ_Init_Params *params);
|
||||
|
||||
/* Destroy queue object. This will first flush the remaining buffered messages to disk.
|
||||
*/
|
||||
void pmq_destroy(PMQ *q);
|
||||
|
||||
bool pmq_enqueue_msg(PMQ *q, const void *data, size_t size);
|
||||
bool pmq_sync(PMQ *q);
|
||||
|
||||
void pmq_get_stats(PMQ *q, PMQ_Stats *stats);
|
||||
|
||||
/* Information about persisted data */
|
||||
struct PMQ_Persist_Info
|
||||
{
|
||||
uint64_t cks_discard_csn; // oldest CSN in the chunk store (next chunk to be discarded)
|
||||
uint64_t cks_msn; // next MSN to hit the chunk store
|
||||
uint64_t wal_msn; // next MSN to hit the WAL
|
||||
};
|
||||
|
||||
PMQ_Persist_Info pmq_get_persist_info(PMQ *q);
|
||||
|
||||
/*
|
||||
* Get an updated value of the byte range of the underlying data store.
|
||||
* The returned range will be chunk-aligned, but what size chunks are is
|
||||
* currently not exposed in this API.
|
||||
*/
|
||||
|
||||
|
||||
/* */
|
||||
enum PMQ_Read_Result
|
||||
{
|
||||
// The message was successfully read back.
|
||||
PMQ_Read_Result_Success,
|
||||
|
||||
// The provided buffer has insufficient size. (The size gets returned back nevertheless)
|
||||
PMQ_Read_Result_Buffer_Too_Small,
|
||||
|
||||
// The requested data is at the current end of the storage area/window. It
|
||||
// is the next data the will be written. Try again later.
|
||||
// TODO: We might want to introduce mechanisms to block until new data arrives at
|
||||
// every level. Currently this has to be implemented in the integration
|
||||
// code.
|
||||
PMQ_Read_Result_EOF,
|
||||
|
||||
// The requested data is not present. Maybe the requested data was discarded
|
||||
// concurrently? It is safe to re-position he cursor and retry.
|
||||
PMQ_Read_Result_Out_Of_Bounds,
|
||||
|
||||
// An error was detected by the storage layer
|
||||
PMQ_Read_Result_IO_Error,
|
||||
|
||||
// A problem with the data read back from the storage layer was detected.
|
||||
PMQ_Read_Result_Integrity_Error,
|
||||
};
|
||||
|
||||
static inline const char *pmq_read_result_string(PMQ_Read_Result readres)
|
||||
{
|
||||
switch (readres)
|
||||
{
|
||||
case PMQ_Read_Result_Success: return "Success";
|
||||
case PMQ_Read_Result_Buffer_Too_Small: return "Buffer_Too_Small";
|
||||
case PMQ_Read_Result_EOF: return "EOF";
|
||||
case PMQ_Read_Result_Out_Of_Bounds: return "Out_Of_Bounds";
|
||||
case PMQ_Read_Result_IO_Error: return "IO_Error";
|
||||
case PMQ_Read_Result_Integrity_Error: return "Integrity_Error";
|
||||
default: return "(invalid value)";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct PMQ_Reader;
|
||||
|
||||
PMQ_Reader *pmq_reader_create(PMQ *q);
|
||||
void pmq_reader_destroy(PMQ_Reader *reader);
|
||||
|
||||
PMQ *pmq_reader_get_pmq(PMQ_Reader *reader);
|
||||
|
||||
/* Position cursor at the next incoming message -- or, in other words, at the
|
||||
* current write end of the queue. */
|
||||
PMQ_Read_Result pmq_reader_seek_to_current(PMQ_Reader *reader);
|
||||
|
||||
/* Position cursor at the oldest message (the first message in the chunk
|
||||
* cks_discard). Note that this is rarely a good idea since this message is
|
||||
* likely to be discarded concurrently, so it runs risk of losing sync
|
||||
* immediately or shortly. */
|
||||
PMQ_Read_Result pmq_reader_seek_to_oldest(PMQ_Reader *reader);
|
||||
|
||||
/* Position cursor to given msn. MSNs cannot be directly adressed. The
|
||||
* implementation will have to load multiple chunks to find it.
|
||||
* This also means that the call can fail -- I/O errors etc. can be returned.
|
||||
*/
|
||||
PMQ_Read_Result pmq_reader_seek_to_msg(PMQ_Reader *reader, uint64_t msn);
|
||||
|
||||
/* Read the current message and advance. On success, returns the size of the
|
||||
* message that was read in @out_size and advances to the next message
|
||||
* internally.
|
||||
*/
|
||||
PMQ_Read_Result pmq_read_msg(PMQ_Reader *reader,
|
||||
void *data, size_t size, size_t *out_size);
|
||||
|
||||
uint64_t pmq_reader_get_current_msn(PMQ_Reader *reader);
|
||||
|
||||
/* Attempt to find the MSN of the oldest persisted message.
|
||||
*
|
||||
* Note that the MSN that ends up being returned might already be discarded
|
||||
* once the caller tries to read that message. So calling this function might
|
||||
* not be a good idea.
|
||||
*
|
||||
* Another difficulty, at the implementation level, is that the implementationn
|
||||
* needs to read the oldest chunk to know the oldest MSN in that chunk. But the
|
||||
* oldest chunk may be discarded concurrently, so reading it might fail. In
|
||||
* case of a concurrent discard, the implementation will update its
|
||||
* oldest-chunk information and then skip ahead some chunks, trying to read a
|
||||
* slightly newer chunk. This makes the operation more likely to succeed next
|
||||
* time. This continues until either a chunk was read successfully, or we run
|
||||
* out of persisted chunks. In the latter case, the implementation returns the
|
||||
* current "next" MSN. The PMQ always keeps track of this information, so we
|
||||
* can know it without reading a chunk from disk.
|
||||
*/
|
||||
uint64_t pmq_reader_find_old_msn(PMQ_Reader *reader);
|
||||
|
||||
/* Equivalent to pmq_get_persist_info(pmq_reader_get_pmq(reader)); */
|
||||
PMQ_Persist_Info pmq_reader_get_persist_info(PMQ_Reader *reader);
|
||||
|
||||
/* pmq_reader_eof() -- Inexpensive check if there are messages available
|
||||
* currently.
|
||||
* This allows a concurrent reader procedure synchronize with writers without
|
||||
* having to actually read a message while holding a lock -- which could block
|
||||
* writers for a long time if we have to do actual I/O.
|
||||
*/
|
||||
bool pmq_reader_eof(PMQ_Reader *reader);
|
||||
|
||||
|
||||
|
||||
// C++ RAII wrappers
|
||||
|
||||
// unique_ptr is maybe not precisely what we're looking for. So we're using some boilerplate instead.
|
||||
//#include <memory>
|
||||
//using PMQ_Handle = std::unique_ptr<PMQ, decltype(pmq_destroy)>;
|
||||
//using PMQ_Reader_Handle = std::unique_ptr<PMQ_Reader, decltype(pmq_reader_destroy)>;
|
||||
|
||||
|
||||
template<typename T, void Deleter(T *)>
|
||||
class PMQ_Handle_Wrapper
|
||||
{
|
||||
T *m_ptr = nullptr;
|
||||
|
||||
public:
|
||||
|
||||
T *get() const
|
||||
{
|
||||
return m_ptr;
|
||||
}
|
||||
|
||||
void drop()
|
||||
{
|
||||
if (m_ptr)
|
||||
{
|
||||
Deleter(m_ptr);
|
||||
m_ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
operator T *() const // automatic implicit cast to T *
|
||||
{
|
||||
return m_ptr;
|
||||
}
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return m_ptr != nullptr;
|
||||
}
|
||||
|
||||
void operator=(PMQ_Handle_Wrapper&& other)
|
||||
{
|
||||
drop();
|
||||
std::swap(m_ptr, other.m_ptr);
|
||||
}
|
||||
|
||||
void operator=(T *ptr)
|
||||
{
|
||||
drop();
|
||||
m_ptr = ptr;
|
||||
}
|
||||
|
||||
void operator=(PMQ_Handle_Wrapper const& other) = delete;
|
||||
|
||||
explicit PMQ_Handle_Wrapper(T *ptr = nullptr)
|
||||
: m_ptr(ptr)
|
||||
{
|
||||
}
|
||||
|
||||
~PMQ_Handle_Wrapper()
|
||||
{
|
||||
drop();
|
||||
}
|
||||
};
|
||||
|
||||
using PMQ_Handle = PMQ_Handle_Wrapper<PMQ, pmq_destroy>;
|
||||
using PMQ_Reader_Handle = PMQ_Handle_Wrapper<PMQ_Reader, pmq_reader_destroy>;
|
||||
350
meta/source/pmq/pmq_base.hpp
Normal file
350
meta/source/pmq/pmq_base.hpp
Normal file
@@ -0,0 +1,350 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <unistd.h>
|
||||
#include <new>
|
||||
|
||||
// macro to align variables to cache line size
|
||||
// There is C++ standardized value of std::hardware_destructive_interference_size.
|
||||
// However that currently produces a warning, probably because of concerns about ABI stability.
|
||||
// So instead I just hardcode a cache line size of 64 bytes for now.
|
||||
// The worst that could happen would be bad performance.
|
||||
//#define __pmq_cache_aligned alignas(std::hardware_destructive_interference_size)
|
||||
#define __pmq_cache_aligned alignas(64)
|
||||
|
||||
// These #define's work for GCC and possibly other compilers. To guarantee
|
||||
// that these definitions are active wherever they could potentially work, I
|
||||
// will define them unconditionally for now, instead of guarding them with
|
||||
// #ifdef __GNUC__.
|
||||
// TODO: try on more compilers and improve compatibility logic!
|
||||
|
||||
#if PMQ_WITH_PROFILING
|
||||
#define __pmq_profiled __attribute__((noinline)) // could consider attribute "noipa" instead of "noinline"
|
||||
#else
|
||||
#define __pmq_profiled
|
||||
#endif
|
||||
|
||||
// "artificial" is used for small inlined wrapper methods, such as operator[].
|
||||
// In theory (and to some extent in practice) the effect should be that the
|
||||
// code that gets inlined to a call site gets attributed to the _call site_
|
||||
// instead of to the definition site of the inlined function -- reducing the
|
||||
// effect of jumping around like wild files when debugging.
|
||||
|
||||
#define __pmq_artificial_method inline __attribute__((always_inline, artificial))
|
||||
#define __pmq_artificial_func static inline __attribute__((always_inline, artificial))
|
||||
|
||||
// Attribute used for logging functions and other printf-style functions. If
|
||||
// these functions are properly annotated, the compiler can check matching
|
||||
// arguments in usage places.
|
||||
|
||||
#define __pmq_formatter(fmt_index, first_arg_index) \
|
||||
__attribute__((format(printf, (fmt_index), (first_arg_index))))
|
||||
|
||||
// treat format warnings as errors for the PMQ
|
||||
// This could be a build system flag but for now I want the change just for
|
||||
// this module in the larger system
|
||||
#pragma GCC diagnostic error "-Wformat"
|
||||
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define pmq_assert(expr)
|
||||
#else
|
||||
static inline void __pmq_assert_fail(const char *expr, const char *file, int line, const char *func)
|
||||
{
|
||||
// this hopefully gives the logger a chance to save the logs.
|
||||
// If there was time, we should probably implement the logger in a separate component,
|
||||
// communicating using a shared memory mapping.
|
||||
|
||||
sleep(3);
|
||||
__assert_fail(expr, file, line, func);
|
||||
}
|
||||
#define pmq_assert(expr) do { if (! (expr)) { __pmq_assert_fail(#expr, __FILE__, __LINE__, __func__); } } while (0)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__pmq_artificial_func
|
||||
void __pmq_assert_aligned(const void *ptr, size_t size)
|
||||
{
|
||||
assert((uintptr_t) (ptr) % size == 0);
|
||||
}
|
||||
|
||||
template<size_t size, typename T>
|
||||
__pmq_artificial_func
|
||||
T __attribute__((aligned(size))) *__pmq_assume_aligned(const T *ptr)
|
||||
{
|
||||
__pmq_assert_aligned(ptr, size);
|
||||
return (T *) __builtin_assume_aligned(ptr, size);
|
||||
}
|
||||
|
||||
|
||||
static inline bool pmq_is_power_of_2(uint64_t value)
|
||||
{
|
||||
assert(value != 0);
|
||||
return (value & (value - 1)) == 0;
|
||||
}
|
||||
|
||||
static inline uint64_t pmq_mask_power_of_2(uint64_t value)
|
||||
{
|
||||
assert(value != 0);
|
||||
assert((value & (value - 1)) == 0);
|
||||
return value - 1;
|
||||
}
|
||||
|
||||
|
||||
static inline constexpr uint64_t PMQ_Kilobytes(uint64_t count) { return count << 10; }
|
||||
static inline constexpr uint64_t PMQ_Megabytes(uint64_t count) { return count << 20; }
|
||||
static inline constexpr uint64_t PMQ_Gigabytes(uint64_t count) { return count << 30; }
|
||||
static inline constexpr uint64_t PMQ_Terabytes(uint64_t count) { return count << 40; }
|
||||
static inline constexpr uint64_t PMQ_Petabytes(uint64_t count) { return count << 50; }
|
||||
|
||||
|
||||
/* Untyped slice class. This is mainly used for slice-copy operations, both for
|
||||
* memory and disk I/O. It saves some boilerplate and is a little bit safer to use.
|
||||
*
|
||||
* Note, we should check if we can replace this with a standard C++ type maybe.
|
||||
* But I personally don't consider this code a liability, and add
|
||||
* __pmq_artificial_method method improves the debugging experience.
|
||||
*/
|
||||
class Untyped_Slice
|
||||
{
|
||||
void *m_data;
|
||||
size_t m_size;
|
||||
|
||||
public:
|
||||
__pmq_artificial_method void *data() const { return m_data; }
|
||||
__pmq_artificial_method size_t size() const { return m_size; }
|
||||
|
||||
__pmq_artificial_method
|
||||
Untyped_Slice offset_bytes(size_t offset) const
|
||||
{
|
||||
assert(offset <= m_size);
|
||||
return Untyped_Slice((char *) m_data + offset, m_size - offset);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Untyped_Slice limit_size_bytes(size_t size) const
|
||||
{
|
||||
assert(size <= m_size);
|
||||
return Untyped_Slice(m_data, size);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Untyped_Slice sub_slice_bytes(size_t offset, size_t size) const
|
||||
{
|
||||
return offset_bytes(offset).limit_size_bytes(size);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Untyped_Slice()
|
||||
{
|
||||
m_data = nullptr;
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Untyped_Slice(void *data, size_t size)
|
||||
{
|
||||
m_data = data;
|
||||
m_size = size;
|
||||
}
|
||||
};
|
||||
|
||||
__pmq_artificial_func
|
||||
void zero_out_slice(Untyped_Slice dst)
|
||||
{
|
||||
memset(dst.data(), 0, dst.size());
|
||||
}
|
||||
|
||||
__pmq_artificial_func
|
||||
void copy_slice(Untyped_Slice dst, Untyped_Slice src)
|
||||
{
|
||||
assert(dst.size() == src.size());
|
||||
memcpy(dst.data(), src.data(), dst.size());
|
||||
}
|
||||
|
||||
__pmq_artificial_func
|
||||
void copy_slice_bytes(Untyped_Slice dst, Untyped_Slice src, size_t size_bytes)
|
||||
{
|
||||
assert(size_bytes <= dst.size());
|
||||
assert(size_bytes <= src.size());
|
||||
memcpy(dst.data(), src.data(), size_bytes);
|
||||
}
|
||||
|
||||
__pmq_artificial_func
|
||||
void copy_to_slice(Untyped_Slice slice, const void *data, size_t size)
|
||||
{
|
||||
assert(slice.size() >= size);
|
||||
memcpy(slice.data(), data, size);
|
||||
}
|
||||
|
||||
__pmq_artificial_func
|
||||
void copy_from_slice(void *data, Untyped_Slice slice, size_t size)
|
||||
{
|
||||
assert(slice.size() >= size);
|
||||
memcpy(data, slice.data(), size);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Typed slice type.
|
||||
*
|
||||
* Note, we should check if we can replace this using std::span (C++20).
|
||||
*/
|
||||
template<typename T>
|
||||
class Slice
|
||||
{
|
||||
T *m_data;
|
||||
size_t m_count;
|
||||
|
||||
public:
|
||||
|
||||
__pmq_artificial_method
|
||||
T *data() const
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
size_t count() const
|
||||
{
|
||||
return m_count;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
size_t size_in_bytes() const
|
||||
{
|
||||
return m_count * sizeof (T);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
T get(size_t index) const
|
||||
{
|
||||
assert(index < m_count);
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
T& at(size_t index)
|
||||
{
|
||||
assert(index < m_count);
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
T const& at(size_t index) const
|
||||
{
|
||||
assert(index < m_count);
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Untyped_Slice untyped() const
|
||||
{
|
||||
return Untyped_Slice(m_data, m_count * sizeof (T));
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Slice<T> slice_from(size_t start_index)
|
||||
{
|
||||
assert(start_index <= m_count);
|
||||
return Slice<T>(m_data + start_index, m_count - start_index);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Slice<T> slice_to(size_t count)
|
||||
{
|
||||
assert(count <= m_count);
|
||||
return Slice<T>(m_data, count);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Slice<T> sub_slice(size_t start_index, size_t count)
|
||||
{
|
||||
return slice_from(start_index).slice_to(count);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Slice()
|
||||
{
|
||||
m_data = nullptr;
|
||||
m_count = 0;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Slice(T *data, size_t count)
|
||||
{
|
||||
m_data = data;
|
||||
m_count = count;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
__pmq_artificial_func
|
||||
void copy_to_slice(Slice<T> slice, const void *data, size_t size)
|
||||
{
|
||||
assert(slice.size_in_bytes() >= size);
|
||||
memcpy(slice.data(), data, size);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__pmq_artificial_func
|
||||
void copy_from_slice(void *data, Slice<T> slice, size_t size)
|
||||
{
|
||||
assert(slice.size_in_bytes() >= size);
|
||||
memcpy(data, slice.data(), size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// A reference type, which wraps a bare pointer. The semantics are the same as
|
||||
// pointer but we don't allow indexing. In other words, the point of this class
|
||||
// is to make clear that it doesn't point to an array but only to a single
|
||||
// (potentially null) object.
|
||||
// In contrast to C++ reference types (T& value), no surprises given value
|
||||
// syntax but pointer semantics.
|
||||
|
||||
template<typename T>
|
||||
class Pointer
|
||||
{
|
||||
T *m_ptr;
|
||||
|
||||
public:
|
||||
|
||||
__pmq_artificial_method
|
||||
T *ptr() const
|
||||
{
|
||||
return m_ptr;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
const T *const_ptr() const
|
||||
{
|
||||
return m_ptr;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Pointer<const T> as_const() const
|
||||
{
|
||||
return Pointer<const T>(m_ptr);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
T *operator->()
|
||||
{
|
||||
return m_ptr;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Pointer(T *ptr)
|
||||
{
|
||||
assert(ptr);
|
||||
m_ptr = ptr;
|
||||
}
|
||||
};
|
||||
851
meta/source/pmq/pmq_common.hpp
Normal file
851
meta/source/pmq/pmq_common.hpp
Normal file
@@ -0,0 +1,851 @@
|
||||
#pragma once
|
||||
|
||||
#include <new> // std::bad_alloc
|
||||
|
||||
#include "pmq_base.hpp"
|
||||
#include "pmq_logging.hpp"
|
||||
#include "pmq_posix_io.hpp"
|
||||
#include "pmq_profiling.hpp"
|
||||
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <dirent.h>
|
||||
|
||||
//
|
||||
// Simple allocating slice class with delayed allocation.
|
||||
//
|
||||
// Why don't I just use std::vector or similar? After all, std::vector is a
|
||||
// well known standard solution that allocates a contiguous buffer of memory.
|
||||
//
|
||||
// I understand this concern, but I've written several simple classes anyway.
|
||||
// Let me try and defend this case of "NIH". (It may or may not convince the
|
||||
// reader).
|
||||
//
|
||||
// This code is much more straightforward and simple compared to STL headers.
|
||||
// It is basically "new" and "delete" wrapped in a simple package together with
|
||||
// operator[] and a way to get a slice to the memory without attached lifetime
|
||||
// semantics.
|
||||
//
|
||||
// Most of the STL classes try to be very generic solutions applicable in a
|
||||
// wide variety of use cases. While using with standardized solutions has the
|
||||
// advantages of familiarity, this flexibility and wide applicability comes
|
||||
// with a complexity cost that brings a disadvantage to anyone working with the
|
||||
// codebase.
|
||||
//
|
||||
// Beyond having a fill level separate from allocation size (size() vs
|
||||
// capacity()), std::vector has all sorts of methods and functionality to
|
||||
// support pushing, popping, emplacing, iterators, constructors, destructors,
|
||||
// and so on. It is highly flexible, which shows whenever an actual
|
||||
// instanciated vector type is printed on the terminal, including template
|
||||
// allocator parameter amongst to other things.
|
||||
//
|
||||
// All this is ill-fitting for our simple use case. For a queue we just need a
|
||||
// few preallocated buffers. Just for convenience and to get a little safety,
|
||||
// the Alloc_Slice wrapper class was created -- so we can do bounds checking
|
||||
// and get to automatically deallocate the buffers in the destructor.
|
||||
//
|
||||
// The size() field and associated semantics that come with std::vector are
|
||||
// baggage that we can't make use of (we have multiple cursors that wrap around
|
||||
// our buffers in circular fashion). These semantics are not just available,
|
||||
// but are understood by programmers as how std::vector gets used.
|
||||
//
|
||||
// From a mere functionality standpoint this shouldn't be an issue -- We could
|
||||
// make sure that we call .resize(N) only once in the beginning and never call
|
||||
// e.g. push_back(), emplace_back(), reserve(), or similar. This way we'd
|
||||
// essentially be considering the size() as a constant i.e. ignore it.
|
||||
//
|
||||
// However, again, this usage of the type is not guaranteed. The sight of a
|
||||
// std::vector normally suggest pushing (maybe popping), resizing and
|
||||
// reserving, buffer reallocation, pointer/iterator invalidation, and runtime
|
||||
// exceptions.
|
||||
//
|
||||
// With the Alloc_Slice class on the other hand, there is no reallocation and
|
||||
// consequently no iterator invalidation. Exceptions might or might not happen
|
||||
// depending on compile settings -- but only at construction time, i.e. program
|
||||
// startup. Because no reallocations are possible, no pointer invalidation /
|
||||
// iterator invalidation is possible.
|
||||
//
|
||||
// Compared to std::vector and other STL headers, significantly less header
|
||||
// code gets included, so the code compiles quicker. How much quicker? In a
|
||||
// simple test with a single file, adding any of vector, string, map etc.
|
||||
// added around 100ms of compilation time (each). I believe I've seen much worse,
|
||||
// but just multiply 100-400ms by the number of files in a large project and
|
||||
// there may be a good argument for avoiding to include STL headers based on
|
||||
// build time. (TODO: refer to example program).
|
||||
//
|
||||
// In fairness, this problem may be partially solved with precompiled headers,
|
||||
// but those come with some issues too. (build setup, pollution, still have to
|
||||
// compile on each rebuild or precompiled header change).
|
||||
//
|
||||
// With the Alloc_Slice class, methods like operator[] have been marked as
|
||||
// "artificial", meaning it's easier to debug code without jumping all over the
|
||||
// place. With std::vector and similar classes, I believe there is no way, or
|
||||
// no standardized way, to build such that we don't jump around files like wild
|
||||
// when debugging.
|
||||
//
|
||||
// If these arguments haven't been convincing, I'll end it now anyway -- the
|
||||
// text is already much bigger than the actual code.
|
||||
|
||||
template<typename T>
|
||||
class Alloc_Slice
|
||||
{
|
||||
T *m_ptr = nullptr;
|
||||
size_t m_capacity = 0;
|
||||
|
||||
public:
|
||||
|
||||
__pmq_artificial_method
|
||||
T& operator[](size_t i) const
|
||||
{
|
||||
return m_ptr[i];
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
T *data() const
|
||||
{
|
||||
return m_ptr;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
size_t capacity() const
|
||||
{
|
||||
return m_capacity;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Slice<T> slice() const
|
||||
{
|
||||
return Slice<T>(m_ptr, m_capacity);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Untyped_Slice untyped_slice() const
|
||||
{
|
||||
return slice().untyped();
|
||||
}
|
||||
|
||||
void allocate(size_t capacity)
|
||||
{
|
||||
assert(! m_ptr);
|
||||
m_ptr = new T[capacity];
|
||||
m_capacity = capacity;
|
||||
}
|
||||
|
||||
~Alloc_Slice()
|
||||
{
|
||||
delete[] m_ptr;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Posix_FD
|
||||
//
|
||||
// Simple file-descriptor holder. The only important purpose is automatically
|
||||
// closing the fd in the destructor. Setting the fd can happen in the
|
||||
// constructor or be delayed. A new fd can be set after closing the old one.
|
||||
// The fd can be retrieved using the .get() method. There are no other methods
|
||||
// defined, the point here is not to make an abstraction over FDs but just to
|
||||
// auto-close it.
|
||||
//
|
||||
// There is not much more to say. A concern was brought up was that it would be
|
||||
// better to use an existing class. Again, it's important to note that we're
|
||||
// not trying to add some (probably ill-defined) abstraction. The fact that
|
||||
// this class stores fds is not hidden and there isn't any I/O functionality
|
||||
// contained.
|
||||
//
|
||||
// Given this, I wasn't sure what existing class to use that does the same
|
||||
// thing. This Posix_FD class was quick and easy to write and I hope it is easy
|
||||
// to read too.
|
||||
//
|
||||
// Another concern was that we shouldn't use close() directly here, but instead
|
||||
// use an abstraction (from an existing library) that papers over platform
|
||||
// differences such that the code can work on e.g. Windows too. (Windows has a
|
||||
// Poxix FS layer as well but the code probably wouldn't work without extra
|
||||
// work and handling of subtle differences).
|
||||
//
|
||||
// I can understand this concern, however BeeGFS can not be easily ported to
|
||||
// e.g. Windows anyway, and this has never been a declared goal of the project.
|
||||
// BeeGFS currently can't build on Windows and probably never will.
|
||||
//
|
||||
// The usage code currently makes non-trivial use of advanced POSIX and Linux
|
||||
// functions, such as openat(), fsync(), mmap(), pread(), pwrite(). sendfile()
|
||||
// was used earlier, and might come back. We rely on Posix file permissions
|
||||
// too, and on certain semantics like for example O_CREAT | O_EXCL during file
|
||||
// creation.
|
||||
//
|
||||
// I'm not aware of a better API that is more portable while providing the same
|
||||
// functionality.
|
||||
//
|
||||
// Also, papering over platform differences may be harder than it initially
|
||||
// sounds as soon as good performance and thus good control and good error
|
||||
// handling is a requirement. To be portable, special handling of platform
|
||||
// idiosyncracies might be required, and the architecture would have to change
|
||||
// anyway: away from synchronous function calls which would make the
|
||||
// abstraction leak into the core code, and towards a more asynchronous model
|
||||
// that is better decoupled from the core code.
|
||||
//
|
||||
// It was proposed that std::ifstream / std::ofstream (or similar standardized
|
||||
// class) could be used instead. std::ifstream in particular would be a bad fit
|
||||
// since it is a very generic class that comes with buffering and formatting by
|
||||
// default. I can't easily see how to replace the calls I listed above using
|
||||
// std::ifstream. Event if it's possible, the result may be more complicated /
|
||||
// require use of the underlying Posix FD anyway / be less clear / be more code
|
||||
// / require to give up some control over syscalls etc. ifstream uses
|
||||
// exceptions and has facilities such as formatting that aren't needed, but the
|
||||
// presence of this attached functionality would make the purpose less clear
|
||||
// IMO.
|
||||
//
|
||||
|
||||
class Posix_FD
|
||||
{
|
||||
int m_fd = -1;
|
||||
|
||||
public:
|
||||
|
||||
__pmq_artificial_method
|
||||
int get()
|
||||
{
|
||||
return m_fd;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
bool valid()
|
||||
{
|
||||
return m_fd != -1;
|
||||
}
|
||||
|
||||
int close_fd()
|
||||
{
|
||||
int ret = 0;
|
||||
if (m_fd != -1)
|
||||
{
|
||||
ret = close(m_fd);
|
||||
m_fd = -1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
void set(int fd)
|
||||
{
|
||||
assert(m_fd == -1);
|
||||
m_fd = fd;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
void operator=(int fd)
|
||||
{
|
||||
set(fd);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Posix_FD()
|
||||
{
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Posix_FD(int fd)
|
||||
{
|
||||
set(fd);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
~Posix_FD()
|
||||
{
|
||||
close_fd();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// Libc_DIR
|
||||
//
|
||||
// Similar to Posix_FD, but for libc DIR * handles. Same rationale for why I've
|
||||
// written this applies as for Posix_FD.
|
||||
//
|
||||
// This class is currently not used so could be removed.
|
||||
//
|
||||
|
||||
class Libc_DIR
|
||||
{
|
||||
DIR *m_dir = nullptr;
|
||||
|
||||
public:
|
||||
|
||||
__pmq_artificial_method
|
||||
bool valid()
|
||||
{
|
||||
return m_dir != nullptr;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
DIR *get()
|
||||
{
|
||||
return m_dir;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
void set(DIR *dir)
|
||||
{
|
||||
assert(m_dir == nullptr);
|
||||
m_dir = dir;
|
||||
}
|
||||
|
||||
void close_dir()
|
||||
{
|
||||
if (m_dir)
|
||||
{
|
||||
closedir(m_dir);
|
||||
m_dir = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
void operator=(DIR *dir)
|
||||
{
|
||||
set(dir);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Libc_DIR()
|
||||
{
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Libc_DIR(DIR *dir)
|
||||
{
|
||||
m_dir = dir;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
~Libc_DIR()
|
||||
{
|
||||
close_dir();
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// Mmap_Region
|
||||
//
|
||||
// Similar to Posix_FD, but for memory mappings.
|
||||
//
|
||||
// On destruction, unmaps the mapped region using munmap().
|
||||
//
|
||||
|
||||
class MMap_Region
|
||||
{
|
||||
void *m_ptr = MAP_FAILED;
|
||||
size_t m_length = 0;
|
||||
|
||||
public:
|
||||
|
||||
__pmq_artificial_method
|
||||
void *get() const
|
||||
{
|
||||
return m_ptr;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Untyped_Slice untyped_slice() const
|
||||
{
|
||||
return Untyped_Slice(m_ptr, m_length);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
bool valid()
|
||||
{
|
||||
return m_ptr != MAP_FAILED;
|
||||
}
|
||||
|
||||
void close_mapping()
|
||||
{
|
||||
if (m_ptr != MAP_FAILED)
|
||||
{
|
||||
if (munmap(m_ptr, m_length) == -1)
|
||||
{
|
||||
// should not happen. Simply printing the error for now
|
||||
pmq_perr_ef(errno, "WARNING: munmap() failed");
|
||||
}
|
||||
m_ptr = MAP_FAILED;
|
||||
m_length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// like mmap but returns whether successful
|
||||
bool create(void *addr, size_t newlength, int prot, int flags,
|
||||
int fd, off_t offset)
|
||||
{
|
||||
assert(m_ptr == MAP_FAILED);
|
||||
void *newptr = mmap(addr, newlength, prot, flags, fd, offset);
|
||||
if (newptr == MAP_FAILED)
|
||||
return false;
|
||||
m_ptr = newptr;
|
||||
m_length = newlength;
|
||||
return true;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
~MMap_Region()
|
||||
{
|
||||
close_mapping();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Mutex_Protected
|
||||
//
|
||||
// Simple wrapper class that protects a data item with a mutex.
|
||||
// The load() and store() mutex implement thread-synchronized read and write
|
||||
// access to the data item by locking the resource with a mutex during the
|
||||
// operation.
|
||||
//
|
||||
// A class like Folly::Synchronized might replace this. But again, this was
|
||||
// very easy to write and is extremely small. Pulling in a large dependency
|
||||
// just for that might not be justified. Also, having our own class allows
|
||||
// choosing the mutex type. For example, if we want to profile mutexes using
|
||||
// the Tracy frame profiler, we need to use Tracy's mutex wrappers (here,
|
||||
// hidden in the PMQ_PROFILED_MUTEX wrapper). While Folly::Synchronized supports
|
||||
// custom mutexes, one would need to understand and impleemnt "the extended
|
||||
// protocol implemented in folly/synchronized/Lock.h".
|
||||
//
|
||||
// Upon quick browsing of the 1000 lines in Lock.h, it isn't immediately clear
|
||||
// what that protocol entails and how much work it would be (if any) to wrap
|
||||
// our own mutex type (which is potentially a wrap of std::mutex already) to
|
||||
// conform to that protocol.
|
||||
//
|
||||
// Maybe there is something in the C++ standard that is suited as a
|
||||
// replacement?
|
||||
//
|
||||
// Maybe there is, but I consider it much easier to just write 2 methods
|
||||
// totalling 4 straightforward lines of code...
|
||||
//
|
||||
|
||||
template<typename T>
|
||||
class Mutex_Protected
|
||||
{
|
||||
PMQ_PROFILED_MUTEX(m_mutex);
|
||||
T m_value;
|
||||
|
||||
public:
|
||||
|
||||
void store(T value)
|
||||
{
|
||||
PMQ_PROFILED_LOCK(lock_, m_mutex);
|
||||
m_value = value;
|
||||
}
|
||||
|
||||
T load()
|
||||
{
|
||||
PMQ_PROFILED_LOCK(lock_, m_mutex);
|
||||
return m_value;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* String "slice" that can be passed around. No lifetime semantics or
|
||||
* unexpected copying etc.
|
||||
*
|
||||
* We could use std::string_view instead, but that is a templated type. The
|
||||
* idea of PMQ_String is to wrap just a char-pointer with a size, and nothing
|
||||
* more, to have a package that one can ship around. We mostly use strings for
|
||||
* printf-style formatting and to open files, and we don't need or want any
|
||||
* more complicated semantics than that.
|
||||
*/
|
||||
struct PMQ_String
|
||||
{
|
||||
const char *buffer;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
/*
|
||||
* Simple string "holder" class that allocates and frees its buffer. The
|
||||
* contained string is immutable once constructed. But a new one can be
|
||||
* "swapped" in by dropping the old string and creating a new one.
|
||||
*
|
||||
* Is this a case of NIH when there is std::string? Maybe, but basically the
|
||||
* same arguments as for Alloc_Slice and the other classes above apply.
|
||||
*
|
||||
* std::string
|
||||
*
|
||||
* - is somewhat slow to compile
|
||||
* - Unexpected allocations / copies (and thus exceptions as well) can happen
|
||||
* very easily, without anyone noticing -- For example, it's as easy as
|
||||
* writing "auto x = y" instead of "auto& x = y".
|
||||
* - Apart from exceptions and copies / resizes, appending, there is more
|
||||
* complexity that we don't need and don't want and that would actually be a
|
||||
* misfit for our project. Ugly error messages with huge types (...
|
||||
* std::basic_char ... etc.) is only a small symptom of this.
|
||||
*/
|
||||
class PMQ_Owned_String
|
||||
{
|
||||
PMQ_String m_string = {};
|
||||
|
||||
public:
|
||||
|
||||
bool valid() const
|
||||
{
|
||||
return m_string.buffer != nullptr;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
PMQ_String get() const
|
||||
{
|
||||
return m_string;
|
||||
}
|
||||
|
||||
void drop()
|
||||
{
|
||||
// Checking only for clarity. free() and the rest of the code would work
|
||||
// with a null buffer too.
|
||||
if (m_string.buffer != nullptr)
|
||||
{
|
||||
free((void *) m_string.buffer);
|
||||
m_string.buffer = nullptr;
|
||||
m_string.size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void set(const char *buffer)
|
||||
{
|
||||
assert(! m_string.buffer);
|
||||
char *copy = strdup(buffer);
|
||||
if (copy == nullptr)
|
||||
{
|
||||
// is an exception what we want / need?
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
m_string.buffer = copy;
|
||||
m_string.size = strlen(buffer);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
PMQ_Owned_String()
|
||||
{
|
||||
m_string.buffer = nullptr;
|
||||
m_string.size = 0;
|
||||
}
|
||||
|
||||
~PMQ_Owned_String()
|
||||
{
|
||||
drop();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* SNs (sequence numbers)
|
||||
*
|
||||
* Sequence numbers, and the ringbuffers that build on them, are a core concept
|
||||
* of how the PMQ works.
|
||||
*
|
||||
* I believe they are pretty much what is elsewhere known as "LMAX Disruptor"
|
||||
* (google it).
|
||||
*
|
||||
* Sequence numbers are 64-bit unsigned integers that can wraparound (but this
|
||||
* is only theoretical -- wraparound is probably completely untested since
|
||||
* 64-bit numbers don't overflow easily in practice).
|
||||
*
|
||||
* Ringbuffers have a number of slots that is 2^N for some N. SN's are mapped
|
||||
* to slots with wrap-around in the ringbuffer's 2^N slots by using the lowest
|
||||
* N bits of the SN to index into the slots array.
|
||||
*
|
||||
* The SN templated class provides some type safety -- the Tag type is a
|
||||
* "phantom tag" (can be implemented by making a new "empty" class) that
|
||||
* prevents indexing into a ringbuffer using a mismatching sequence number. For
|
||||
* example, we have a ringbuffer of input-slots that should be indexed by *slot
|
||||
* sequence numbers* (SSNs). And we have a ringbuffer of chunks that should be
|
||||
* indexed by *chunk sequence numbers (CSNs). The on-disk chunk store is
|
||||
* another kind of ringbuffer that works with the same principle of wrapping
|
||||
* around automatically.
|
||||
*
|
||||
* We also track *message sequence numbers* (MSNs) but we don't use them for
|
||||
* indexing, only for binary search.
|
||||
*
|
||||
* Mathematically, SNs form an affine space. This is like a vector space but
|
||||
* without a designated origin (pls forgive me if what I write here is slightly
|
||||
* incorrect as far as mathematics is concerned. Only the idea matters). There
|
||||
* is a 0 value, but it is not meaningfully different compared to any other
|
||||
* value.
|
||||
*
|
||||
* One can subtract two sequence numbers to get a distance (represented as bare
|
||||
* uint64_t), and one can add a distance to a sequence number to get a new
|
||||
* sequence number. However, unlike a vector space with designated 0, one can
|
||||
* not add two sequence numbers meaningfully (SN<T> has operator+(uint64_t d)
|
||||
* but no operator+(SN<T>& other).
|
||||
*/
|
||||
|
||||
template<typename Tag>
|
||||
class SN
|
||||
{
|
||||
uint64_t m_value;
|
||||
|
||||
public:
|
||||
|
||||
explicit SN(uint64_t value)
|
||||
{
|
||||
m_value = value;
|
||||
}
|
||||
|
||||
// Some C++ trivia following. In most cases you can ignore this and just use
|
||||
// the class similar to primitive integers.
|
||||
//
|
||||
// Here we specify an *explicitly-defaulted default-constructor*. This will
|
||||
// allow us to initialize the object with undefined (garbage) value if we
|
||||
// want so.
|
||||
//
|
||||
// Explanation: Since we have explicitly specified the constructor with 1
|
||||
// argument already, there wouldn't be an implicit default constructor (a
|
||||
// constructor with no arguments). To get a default constructor, we need to
|
||||
// explicitly specify one. We need a default constructor (no constructor
|
||||
// arguments) if we want to write
|
||||
//
|
||||
// SN sn;
|
||||
//
|
||||
// For simple data types (like SN), we typically want the above line to
|
||||
// leave the object's members uninitialized (garbage values). While this is
|
||||
// in some ways dangerous, it can be simpler especially for objects where
|
||||
// zero-initialization isn't very convenient or meaningful. Leaving values
|
||||
// uninitialized in the default constructor also allows the compiler to
|
||||
// catch bugs in some situations when the user unintentionally forgot to
|
||||
// specify an explicit value.
|
||||
//
|
||||
// Note a gotcha: There is a difference between an empty default constructor
|
||||
//
|
||||
// SN() {}
|
||||
//
|
||||
// and an (explicitly or implicitly) defaulted default constructor:
|
||||
//
|
||||
// SN() = default;
|
||||
//
|
||||
// If we use the class like this:
|
||||
//
|
||||
// SN x {};
|
||||
// SN y = SN(); // or like this
|
||||
// SN z = {}; // or like this...
|
||||
//
|
||||
// then x will contain garbarge with the empty default constructor, but will
|
||||
// be zero-initialized with the (explicitly-) defaulted default constructor.
|
||||
// We'd typically want zero initialization with this syntax.
|
||||
|
||||
SN() = default;
|
||||
|
||||
__pmq_artificial_method
|
||||
uint64_t value() const
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
void operator++()
|
||||
{
|
||||
m_value++;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
void operator++(int)
|
||||
{
|
||||
m_value++;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
SN operator+=(uint64_t d)
|
||||
{
|
||||
m_value += d;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
SN& operator-=(uint64_t d)
|
||||
{
|
||||
m_value -= d;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
SN operator+(uint64_t d) const
|
||||
{
|
||||
return SN(m_value + d);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
SN operator-(uint64_t d) const
|
||||
{
|
||||
return SN(m_value - d);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
uint64_t operator-(SN other) const
|
||||
{
|
||||
return m_value - other.m_value;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
bool operator==(SN other) const
|
||||
{
|
||||
return m_value == other.m_value;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
bool operator!=(SN other) const
|
||||
{
|
||||
return m_value != other.m_value;
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* COMPARING SEQUENCE NUMBERS
|
||||
* ==========================
|
||||
*
|
||||
* Since sequence numbers wrap around (in theory, when 64 bits overflow) they
|
||||
* have no natural ordering.
|
||||
*
|
||||
* However, in practice, sequence numbers are used to index in much smaller
|
||||
* buffer, and at any given time there is only a small window of sequence
|
||||
* numbers. It's a sliding window, but a window still.
|
||||
*
|
||||
* So, admitting that the sequence numbers in a given window may wraparound,
|
||||
* back to 0, we can still assume that they never "overtake" each other.
|
||||
* We can subtract two numbers using unsigned arithmetic and determine their
|
||||
* relative ordering from the result. Centering our worldview at a number x, we
|
||||
* divide the space of uint64_t numbers into those that are less than x (x -
|
||||
* 2^63 to x) and those that are greater than x (x to 2^63).
|
||||
*
|
||||
* Note that this relation is not transitive (x <= y && y <= z does not imply x
|
||||
* <= z), and not antisymmetric -- (x + 2^63) is both greater and less than x.
|
||||
* So it's not a true ordering relation, but in practice we can use it to
|
||||
* reliably compare items by "age".
|
||||
*
|
||||
* The value 1 should be considered greater than UINT64_MAX, since 1 -
|
||||
* UINT64_MAX == 2. Conversely, UINT64_MAX is less than 1 since UINT64_MAX - 1
|
||||
* equals (UINT64_MAX - 1), which is a.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
// Comparing bare uint64_t sequence values.
|
||||
|
||||
__pmq_artificial_func
|
||||
bool _sn64_lt(uint64_t a, uint64_t b)
|
||||
{
|
||||
return b - (a + 1) <= UINT64_MAX / 2;
|
||||
}
|
||||
|
||||
__pmq_artificial_func
|
||||
bool _sn64_le(uint64_t a, uint64_t b)
|
||||
{
|
||||
return b - a <= UINT64_MAX / 2;
|
||||
}
|
||||
|
||||
__pmq_artificial_func
|
||||
bool _sn64_ge(uint64_t a, uint64_t b)
|
||||
{
|
||||
return a - b <= UINT64_MAX / 2;
|
||||
}
|
||||
|
||||
__pmq_artificial_func
|
||||
bool _sn64_gt(uint64_t a, uint64_t b)
|
||||
{
|
||||
return a - (b + 1) <= UINT64_MAX / 2;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Comparing type-safe "tagged" SN values
|
||||
|
||||
template<typename Tag>
|
||||
__pmq_artificial_func
|
||||
bool sn64_lt(SN<Tag> a, SN<Tag> b)
|
||||
{
|
||||
return b - (a + 1) <= UINT64_MAX / 2;
|
||||
}
|
||||
|
||||
template<typename Tag>
|
||||
__pmq_artificial_func
|
||||
bool sn64_le(SN<Tag> a, SN<Tag> b)
|
||||
{
|
||||
return b - a <= UINT64_MAX / 2;
|
||||
}
|
||||
|
||||
template<typename Tag>
|
||||
__pmq_artificial_func
|
||||
bool sn64_ge(SN<Tag> a, SN<Tag> b)
|
||||
{
|
||||
return a - b <= UINT64_MAX / 2;
|
||||
}
|
||||
|
||||
template<typename Tag>
|
||||
__pmq_artificial_func
|
||||
bool sn64_gt(SN<Tag> a, SN<Tag> b)
|
||||
{
|
||||
return a - (b + 1) <= UINT64_MAX / 2;
|
||||
}
|
||||
|
||||
template<typename Tag>
|
||||
__pmq_artificial_func
|
||||
bool sn64_inrange(SN<Tag> sn, SN<Tag> lo, SN<Tag> hi)
|
||||
{
|
||||
return sn - lo <= hi - lo;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Ringbuffer containing a buffer (power-of-2 size) of element of type V. It
|
||||
// can be "indexed" using SN's of matching type.
|
||||
|
||||
template<typename Tag, typename V>
|
||||
class Ringbuffer
|
||||
{
|
||||
using K = SN<Tag>;
|
||||
|
||||
V *m_ptr = nullptr;
|
||||
size_t m_count = 0;
|
||||
|
||||
public:
|
||||
|
||||
__pmq_artificial_method
|
||||
uint64_t slot_count() const
|
||||
{
|
||||
return m_count;
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
void reset(Slice<V> slice)
|
||||
{
|
||||
assert(pmq_is_power_of_2(slice.count()));
|
||||
m_ptr = slice.data();
|
||||
m_count = slice.count();
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Slice<V> as_slice() const
|
||||
{
|
||||
return Slice<V>(m_ptr, m_count);
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
const V *get_slot_for(K k) const
|
||||
{
|
||||
return &m_ptr[k.value() & (m_count - 1)];
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
V *get_slot_for(K k)
|
||||
{
|
||||
return &m_ptr[k.value() & (m_count - 1)];
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Ringbuffer()
|
||||
{
|
||||
}
|
||||
|
||||
__pmq_artificial_method
|
||||
Ringbuffer(V *ptr, uint64_t size)
|
||||
{
|
||||
reset(ptr, size);
|
||||
}
|
||||
};
|
||||
241
meta/source/pmq/pmq_logging.cpp
Normal file
241
meta/source/pmq/pmq_logging.cpp
Normal file
@@ -0,0 +1,241 @@
|
||||
#include "pmq_logging.hpp"
|
||||
#include "pmq_common.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
// The logging module can either print to stderr or use the BeeGFS metadata
|
||||
// server's logging backend.
|
||||
|
||||
#ifdef PMQ_TEST
|
||||
# ifndef PMQ_LOG_LEVEL
|
||||
# error PMQ_LOG_LEVEL must be defined when compiling test case
|
||||
# endif
|
||||
#define INTEGRATE_WITH_METADATA_SERVER 0
|
||||
#else
|
||||
#define INTEGRATE_WITH_METADATA_SERVER 1
|
||||
#endif
|
||||
|
||||
|
||||
#if INTEGRATE_WITH_METADATA_SERVER
|
||||
// Integrate into metadata server
|
||||
#include <common/app/log/Logger.h>
|
||||
#endif
|
||||
|
||||
struct Log_Buffer
|
||||
{
|
||||
PMQ_PROFILED_MUTEX(mutex);
|
||||
PMQ_PROFILED_CONDVAR(writeable); // reader => writer
|
||||
PMQ_PROFILED_CONDVAR(readable); // writer => reader. Corresponding mutex is in Log_Message
|
||||
Alloc_Slice<Log_Message> msgs;
|
||||
size_t capacity = 0;
|
||||
size_t writepos = 0;
|
||||
size_t readpos = 0;
|
||||
|
||||
Log_Buffer()
|
||||
{
|
||||
// TODO: this costs a lot of memory. However, a previous setting of 64
|
||||
// wasn't enough for high-frequency logging. We need more dynamic and
|
||||
// judicious memory allocation.
|
||||
capacity = 1024;
|
||||
msgs.allocate(capacity);
|
||||
}
|
||||
};
|
||||
|
||||
static void log_buffer_write(Log_Buffer *logbuf, Log_Message const *input)
|
||||
{
|
||||
PMQ_PROFILED_UNIQUE_LOCK(lock, logbuf->mutex);
|
||||
while (logbuf->writepos - logbuf->readpos == logbuf->capacity)
|
||||
logbuf->writeable.wait(lock);
|
||||
size_t mask = logbuf->capacity - 1;
|
||||
size_t pos = logbuf->writepos;
|
||||
Log_Message *msg = &logbuf->msgs[pos & mask];
|
||||
msg->size = input->size;
|
||||
memcpy(msg->data, input->data, input->size);
|
||||
++ logbuf->writepos;
|
||||
// hoping that this is cheap: otherwise we should track the number of
|
||||
// readers and check it before calling notify_one()
|
||||
logbuf->readable.notify_one();
|
||||
}
|
||||
|
||||
static void _log_buffer_read(Log_Buffer *logbuf, Log_Message *output)
|
||||
{
|
||||
size_t mask = logbuf->capacity - 1;
|
||||
size_t pos = logbuf->readpos;
|
||||
Log_Message *msg = &logbuf->msgs[pos & mask];
|
||||
output->size = msg->size;
|
||||
memcpy(output->data, msg->data, msg->size);
|
||||
++ logbuf->readpos;
|
||||
// hoping that this is cheap: otherwise we should track the number of
|
||||
// writers and check it before calling notify_one()
|
||||
logbuf->writeable.notify_one();
|
||||
}
|
||||
|
||||
static void log_buffer_read(Log_Buffer *logbuf, Log_Message *output)
|
||||
{
|
||||
PMQ_PROFILED_UNIQUE_LOCK(lock, logbuf->mutex);
|
||||
while (logbuf->writepos == logbuf->readpos)
|
||||
logbuf->readable.wait(lock);
|
||||
_log_buffer_read(logbuf, output);
|
||||
}
|
||||
|
||||
static bool log_buffer_try_read(Log_Buffer *logbuf, Log_Message *output)
|
||||
{
|
||||
PMQ_PROFILED_LOCK(lock, logbuf->mutex);
|
||||
if (logbuf->writepos == logbuf->readpos)
|
||||
return false;
|
||||
_log_buffer_read(logbuf, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool log_buffer_try_read_timeout_millis(Log_Buffer *logbuf, Log_Message *output, int millis)
|
||||
{
|
||||
auto time_point = std::chrono::steady_clock::now() + std::chrono::milliseconds(millis);
|
||||
PMQ_PROFILED_UNIQUE_LOCK(lock, logbuf->mutex);
|
||||
while (logbuf->writepos == logbuf->readpos)
|
||||
{
|
||||
if (logbuf->readable.wait_until(lock, time_point) == std::cv_status::timeout)
|
||||
return false;
|
||||
}
|
||||
_log_buffer_read(logbuf, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Log_Buffer global_log_buffer;
|
||||
|
||||
void pmq_write_log_message(Log_Message const *input)
|
||||
{
|
||||
log_buffer_write(&global_log_buffer, input);
|
||||
}
|
||||
|
||||
void pmq_read_log_message(Log_Message *output)
|
||||
{
|
||||
log_buffer_read(&global_log_buffer, output);
|
||||
}
|
||||
|
||||
bool pmq_try_read_log_message(Log_Message *output)
|
||||
{
|
||||
return log_buffer_try_read(&global_log_buffer, output);
|
||||
}
|
||||
|
||||
bool pmq_try_read_log_message_timeout_millis(Log_Message *output, int millis)
|
||||
{
|
||||
return log_buffer_try_read_timeout_millis(&global_log_buffer, output, millis);
|
||||
}
|
||||
|
||||
|
||||
void log_msg_printfv(Log_Message *msg, const char *fmt, va_list ap)
|
||||
{
|
||||
int ret = vsnprintf(msg->data + msg->size, sizeof msg->data - 1 - msg->size, fmt, ap);
|
||||
assert(ret >= 0);
|
||||
msg->size += (size_t) ret;
|
||||
if (msg->size > sizeof msg->data - 1)
|
||||
msg->size = sizeof msg->data - 1;
|
||||
msg->data[msg->size] = 0;
|
||||
}
|
||||
|
||||
// Note: this is a method (implicit this pointer) so we use
|
||||
// __pmq_formatter(2, 3) instead of __pmq_formatter(1, 2).
|
||||
void __pmq_formatter(2, 3) log_msg_printf(Log_Message *msg, const char *fmt, ...) // NOLINT this is safe because of __pmq_formatter() annotation
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
log_msg_printfv(msg, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void pmq_msg_ofv(const PMQ_Msg_Options& opt, const char *fmt, va_list ap)
|
||||
{
|
||||
bool print_errno = (bool) (opt.flags & PMQ_MSG_OPT_ERRNO);
|
||||
uint32_t priority = opt.flags & PMQ_MSG_OPT_LVL_MASK;
|
||||
|
||||
Log_Message log_msg;
|
||||
log_msg.size = 0;
|
||||
|
||||
#if INTEGRATE_WITH_METADATA_SERVER
|
||||
|
||||
int metadata_priority = 0;
|
||||
|
||||
switch (priority)
|
||||
{
|
||||
case PMQ_MSG_OPT_LVL_DEBUG: metadata_priority = Log_DEBUG; break;
|
||||
case PMQ_MSG_OPT_LVL_INFO: metadata_priority = Log_NOTICE; break;
|
||||
case PMQ_MSG_OPT_LVL_WARN: metadata_priority = Log_WARNING; break;
|
||||
case PMQ_MSG_OPT_LVL_ERR: metadata_priority = Log_ERR; break;
|
||||
default: assert(0); // can't happen at least currently where log mask has 2 bits.
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Early return, avoiding most of the work if the message has less priority
|
||||
// than the log level.
|
||||
// TODO: we should have something like this for metadata server integration
|
||||
// too.
|
||||
if (PMQ_LOG_LEVEL > priority)
|
||||
return;
|
||||
|
||||
switch (priority)
|
||||
{
|
||||
case PMQ_MSG_OPT_LVL_DEBUG: log_msg_printf(&log_msg, "DEBUG: "); break;
|
||||
case PMQ_MSG_OPT_LVL_INFO: log_msg_printf(&log_msg, "INFO: "); break;
|
||||
case PMQ_MSG_OPT_LVL_WARN: log_msg_printf(&log_msg, "WARNING: "); break;
|
||||
case PMQ_MSG_OPT_LVL_ERR: log_msg_printf(&log_msg, "ERROR: "); break;
|
||||
default: assert(0); // can't happen at least currently where log mask has 2 bits.
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
log_msg_printfv(&log_msg, fmt, ap);
|
||||
|
||||
if (print_errno)
|
||||
{
|
||||
char errbuf[64];
|
||||
const char *errstr;
|
||||
|
||||
#if (_POSIX_C_SOURCE >= 200112L) && ! _GNU_SOURCE
|
||||
{
|
||||
// XSI compliant strerror_r()
|
||||
int ret = strerror_r(opt.errnum, errbuf, sizeof errbuf);
|
||||
if (ret == 0)
|
||||
errstr = errbuf;
|
||||
}
|
||||
#else
|
||||
{
|
||||
// GNU version of strerror_r()
|
||||
errstr = strerror_r(opt.errnum, errbuf, sizeof errbuf);
|
||||
}
|
||||
#endif
|
||||
if (! errstr)
|
||||
{
|
||||
snprintf(errbuf, sizeof errbuf, "(errno=%d)", opt.errnum);
|
||||
errstr = errbuf;
|
||||
}
|
||||
|
||||
log_msg_printf(&log_msg, ": %s", errstr);
|
||||
}
|
||||
|
||||
#if INTEGRATE_WITH_METADATA_SERVER
|
||||
// Integration into metadata server
|
||||
Logger *logger = Logger::getLogger();
|
||||
logger->log(LogTopic_EVENTLOGGER, metadata_priority, opt.loc.file, opt.loc.line, log_msg.data);
|
||||
#else
|
||||
|
||||
log_msg_printf(&log_msg, "\n");
|
||||
|
||||
//fwrite(log_msg.data, log_msg.size, 1, stderr);
|
||||
|
||||
pmq_write_log_message(&log_msg);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void __pmq_formatter(2, 3) pmq_msg_of(const PMQ_Msg_Options& opt, const char *fmt, ...) // NOLINT this is safe because of use of __pmq_formatter() annotation
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
pmq_msg_ofv(opt, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
112
meta/source/pmq/pmq_logging.hpp
Normal file
112
meta/source/pmq/pmq_logging.hpp
Normal file
@@ -0,0 +1,112 @@
|
||||
#pragma once
|
||||
|
||||
#include "pmq_base.hpp"
|
||||
|
||||
enum
|
||||
{
|
||||
PMQ_MSG_OPT_DEFAULT = 0,
|
||||
PMQ_MSG_OPT_ERRNO = (1 << 0),
|
||||
PMQ_MSG_HAS_SOURCE_LOC = (1 << 1),
|
||||
|
||||
PMQ_MSG_OPT_LVL_MASK = (3 << 2), // bits 3 and 4 for debug level.
|
||||
PMQ_MSG_OPT_LVL_DEBUG = (0 << 2),
|
||||
PMQ_MSG_OPT_LVL_INFO = (1 << 2),
|
||||
PMQ_MSG_OPT_LVL_WARN = (2 << 2),
|
||||
PMQ_MSG_OPT_LVL_ERR = (3 << 2),
|
||||
};
|
||||
|
||||
struct PMQ_Source_Loc
|
||||
{
|
||||
const char *file;
|
||||
uint32_t line;
|
||||
};
|
||||
|
||||
struct PMQ_Msg_Options
|
||||
{
|
||||
PMQ_Source_Loc loc;
|
||||
uint32_t flags; // PMQ_MSG_OPT_
|
||||
int errnum;
|
||||
};
|
||||
|
||||
|
||||
// Logging functions / macros.
|
||||
//
|
||||
// The following functions are typically used in the client code.
|
||||
//
|
||||
// pmq_msg_f(fmt, ...): Submit a log message with default log level and format string + var-args
|
||||
// pmq_perr_f(fmt, ...): Submit a log message with error log level and format string + var-args
|
||||
// pmq_perr_ef(errno, fmt, ...): like pmq_perr_f() but also add text for given system error code ("errno")
|
||||
//
|
||||
// To explain all functions available here: they are made according to a pattern
|
||||
//
|
||||
// pmq_{LVL}_{MNEMNONICS}
|
||||
//
|
||||
// LVL: logging level, possible options
|
||||
// - msg: Default level or use specified level ('l' mnemnonic)
|
||||
// - debug: Debug level
|
||||
// - warn: Warning level
|
||||
// - perr: Error level ("print-error")
|
||||
//
|
||||
// MNEMNONICS: combination of 1-letter chars
|
||||
// - l: means a logging level is specified (only available with 'msg' category)
|
||||
// - e: add a text for specified system error code ("errno")
|
||||
// - f: "format", like in the stdio function printf().
|
||||
// - v: in combination with f (so 'fv'), means the arguments come as a va_list, like in stdio function vfprintf().
|
||||
// - o: Use a single options struct holding level, errno explictly, as well as source code location info.
|
||||
|
||||
|
||||
void pmq_msg_ofv(const PMQ_Msg_Options& opt, const char *fmt, va_list ap);
|
||||
|
||||
void __pmq_formatter(2, 3) pmq_msg_of(const PMQ_Msg_Options& opt, const char *fmt, ...);
|
||||
|
||||
#define PMQ_SOURCE_LOC ((PMQ_Source_Loc) { __FILE__, __LINE__ })
|
||||
|
||||
#define PMQ_MSG_OPTIONS(...) (PMQ_Msg_Options { PMQ_SOURCE_LOC, ##__VA_ARGS__ })
|
||||
|
||||
#define pmq_msg_lf(lvl, fmt, ...) \
|
||||
pmq_msg_of(PMQ_MSG_OPTIONS((lvl), 0), fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_msg_lef(lvl, e, fmt, ...) \
|
||||
pmq_msg_of(PMQ_MSG_OPTIONS(PMQ_MSG_OPT_ERRNO | (lvl), e), fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_msg_f(fmt, ...) \
|
||||
pmq_msg_lf(PMQ_MSG_OPT_LVL_INFO, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_msg_ef(e, fmt, ...) \
|
||||
pmq_msg_lef(PMQ_MSG_OPT_LVL_INFO, (e), fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_debug_f(fmt, ...) \
|
||||
pmq_msg_lf(PMQ_MSG_OPT_LVL_DEBUG, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_debug_ef(e, fmt, ...) \
|
||||
pmq_msg_lef(PMQ_MSG_OPT_LVL_DEBUG, (e), fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_warn_f(fmt, ...) \
|
||||
pmq_msg_lf(PMQ_MSG_OPT_LVL_WARN, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_warn_ef(e, fmt, ...) \
|
||||
pmq_msg_lef(PMQ_MSG_OPT_LVL_WARN, (e), fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_perr_f(fmt, ...) \
|
||||
pmq_msg_lf(PMQ_MSG_OPT_LVL_ERR, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pmq_perr_ef(e, fmt, ...) \
|
||||
pmq_msg_lef(PMQ_MSG_OPT_LVL_ERR, (e), fmt, ##__VA_ARGS__)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Low-level Logging I/O interface
|
||||
|
||||
struct Log_Message
|
||||
{
|
||||
size_t size;
|
||||
char data[256 - sizeof (size_t)]; // for simplicity
|
||||
};
|
||||
|
||||
void pmq_write_log_message(Log_Message const *input);
|
||||
void pmq_read_log_message(Log_Message *output);
|
||||
bool pmq_try_read_log_message_timeout_millis(Log_Message *output, int millis);
|
||||
bool pmq_try_read_log_message(Log_Message *output);
|
||||
212
meta/source/pmq/pmq_posix_io.hpp
Normal file
212
meta/source/pmq/pmq_posix_io.hpp
Normal file
@@ -0,0 +1,212 @@
|
||||
#pragma once
|
||||
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "pmq_logging.hpp"
|
||||
|
||||
/* Wrapper around open() to open directories to put this awkward code in a
|
||||
* central place. It is counter-intuitive but this is apparently how you're
|
||||
* supposed to open directories on Unix, both the reading and "writing" (i.e.
|
||||
* create, unlink, rename).
|
||||
* The O_DIRECTORY flag is optional but the O_RDONLY is not; opening with
|
||||
* O_RDWR | O_DIRECTORY fails with "is a directory" (weird!).
|
||||
*
|
||||
* Returns: fd to open directory or -1, in which case the errno variable must
|
||||
* be handled as usual.
|
||||
*/
|
||||
static inline int pmq_open_dir(const char *path)
|
||||
{
|
||||
return open(path, O_RDONLY | O_DIRECTORY);
|
||||
}
|
||||
|
||||
static inline int pmq_check_regular_file(int fd, const char *what_file)
|
||||
{
|
||||
struct stat st;
|
||||
|
||||
if (fstat(fd, &st) == -1)
|
||||
{
|
||||
pmq_perr_ef(errno, "Failed to fstat() the fd we opened");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (! S_ISREG(st.st_mode))
|
||||
{
|
||||
pmq_perr_f("We opened the file '%s' expecting a regular file but it's not",
|
||||
what_file);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Note: returns an fd >= 0 if successful.
|
||||
// On failure, -1 is returned and
|
||||
// - if the file failed to open, errno indicates why the file failed to open.
|
||||
// - if the file was opened successfuly but then closed again because it was not
|
||||
// a regular file, errno is set to 0.
|
||||
static inline int pmq_openat_regular_existing(
|
||||
int basedir_fd, const char *relpath, int flags)
|
||||
{
|
||||
// only access mode may be specified -- no other flags
|
||||
// In particular, O_CREAT would break the logic.
|
||||
assert(flags == O_RDWR || flags == O_RDONLY || flags == O_WRONLY);
|
||||
|
||||
int fd = openat(basedir_fd, relpath, flags, 0);
|
||||
|
||||
if (fd == -1)
|
||||
{
|
||||
int e = errno;
|
||||
pmq_perr_ef(errno, "Failed to openat() existing file='%s', flags=%x",
|
||||
relpath, flags);
|
||||
errno = e;
|
||||
return fd;
|
||||
}
|
||||
|
||||
/* The case where fd refers to something other than a regular file _may_
|
||||
* have been caught by the kernel already above. For example, opening a
|
||||
* directory using O_RDWR will fail. On the other hand, opening a directory
|
||||
* using O_RDONLY will succeed.
|
||||
* In any case, doing an explicit check here.
|
||||
*/
|
||||
if (! pmq_check_regular_file(fd, relpath))
|
||||
{
|
||||
close(fd);
|
||||
errno = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static inline int pmq_openat_regular_create(
|
||||
int basedir_fd, const char *relpath, int flags, mode_t mode)
|
||||
{
|
||||
// only access mode may be specified -- no other flags
|
||||
assert(flags == O_RDWR || flags == O_RDONLY || flags == O_WRONLY);
|
||||
|
||||
// But this func makes sure that the creation-flags are specified
|
||||
flags |= O_CREAT | O_EXCL;
|
||||
|
||||
int fd = openat(basedir_fd, relpath, flags, mode);
|
||||
|
||||
if (fd == -1)
|
||||
{
|
||||
int e = errno;
|
||||
pmq_perr_ef(errno, "Failed to openat() file='%s', flags=%x, mode=%o",
|
||||
relpath, flags, (unsigned) mode);
|
||||
errno = e;
|
||||
return fd;
|
||||
}
|
||||
|
||||
// all necessarily error handling should be done by the OS. (note O_EXCL)
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
__pmq_artificial_func
|
||||
void assert_sane_size(size_t size)
|
||||
{
|
||||
// check that size is representable as a ssize_t too.
|
||||
// It is implementation defined how syscalls like write() handle write I/O sizes larger than SSIZE_T.
|
||||
// So better don't even try to.
|
||||
|
||||
assert((size_t) (ssize_t) size == size);
|
||||
}
|
||||
|
||||
static inline bool pmq_write_all(int fd, Untyped_Slice slice, const char *what)
|
||||
{
|
||||
assert_sane_size(slice.size());
|
||||
|
||||
while (slice.size())
|
||||
{
|
||||
ssize_t nw = write(fd, slice.data(), slice.size());
|
||||
|
||||
if (nw == -1)
|
||||
{
|
||||
int e = errno;
|
||||
pmq_perr_ef(errno, "Failed to write %zu bytes to %s",
|
||||
slice.size(), what);
|
||||
errno = e;
|
||||
return false;
|
||||
}
|
||||
|
||||
slice = slice.offset_bytes((size_t) nw);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool pmq_pwrite_all(int fd, Untyped_Slice slice, off_t offset, const char *what)
|
||||
{
|
||||
assert_sane_size(slice.size());
|
||||
|
||||
while (slice.size())
|
||||
{
|
||||
ssize_t nw = pwrite(fd, slice.data(), slice.size(), offset);
|
||||
|
||||
if (nw == -1)
|
||||
{
|
||||
int e = errno;
|
||||
pmq_perr_ef(errno, "Failed to pwrite() %zu bytes at offset %jd to %s",
|
||||
slice.size(), (intmax_t) offset, what);
|
||||
errno = e;
|
||||
return false;
|
||||
}
|
||||
|
||||
slice = slice.offset_bytes((size_t) nw);
|
||||
offset += (size_t) nw;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool pmq_read_all(int fd, Untyped_Slice slice, const char *what)
|
||||
{
|
||||
assert_sane_size(slice.size());
|
||||
|
||||
while (slice.size())
|
||||
{
|
||||
ssize_t nw = read(fd, slice.data(), slice.size());
|
||||
|
||||
if (nw == -1)
|
||||
{
|
||||
int e = errno;
|
||||
pmq_perr_ef(errno, "Failed to read %zu bytes from %s",
|
||||
slice.size(), what);
|
||||
errno = e;
|
||||
return false;
|
||||
}
|
||||
|
||||
slice = slice.offset_bytes((size_t) nw);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool pmq_pread_all(int fd, Untyped_Slice slice, off_t offset, const char *what)
|
||||
{
|
||||
assert_sane_size(slice.size());
|
||||
|
||||
while (slice.size())
|
||||
{
|
||||
ssize_t nw = pread(fd, slice.data(), slice.size(), offset);
|
||||
|
||||
if (nw == -1)
|
||||
{
|
||||
int e = errno;
|
||||
pmq_perr_ef(errno, "Failed to pread() %zu bytes at offset %jd to %s",
|
||||
slice.size(), (intmax_t) offset, what);
|
||||
errno = e;
|
||||
return false;
|
||||
}
|
||||
|
||||
slice = slice.offset_bytes((size_t) nw);
|
||||
offset += (size_t) nw;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
46
meta/source/pmq/pmq_profiling.hpp
Normal file
46
meta/source/pmq/pmq_profiling.hpp
Normal file
@@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
#if PMQ_WITH_PROFILING
|
||||
|
||||
// NOTE: this requires building with matching headers in the include-path for
|
||||
// tracy. Tracy is a "frame profiler": https://github.com/wolfpld/tracy
|
||||
// If PMQ is built as part of a bigger project (e.g. BeeGFS metadata server)
|
||||
// buildin with the proper settings may not be supported (yet).
|
||||
// A build setup that supports tracy currently exists as part of the flex-docs
|
||||
// repository (ask the BeeGFS team).
|
||||
|
||||
# include <Tracy.hpp>
|
||||
|
||||
# define PMQ_PROFILING_CTX FrameMark
|
||||
# define PMQ_PROFILED_SCOPE(name) ZoneScopedN(name)
|
||||
# define PMQ_PROFILED_FUNCTION ZoneScoped
|
||||
# define PMQ_PROFILED_MUTEX(name) TracyLockable(std::mutex, name)
|
||||
# define PMQ_PROFILED_CONDVAR(name) std::condition_variable_any name
|
||||
# define PMQ_PROFILED_LOCK(name, themutex) \
|
||||
auto& __ref__##name(themutex); \
|
||||
std::lock_guard<LockableBase(std::mutex)> name(__ref__##name); \
|
||||
LockMark(__ref__##name)
|
||||
# define PMQ_PROFILED_UNIQUE_LOCK(name, themutex) \
|
||||
auto& __ref__##name(themutex); \
|
||||
std::unique_lock<LockableBase(std::mutex)> name(__ref__##name); \
|
||||
LockMark(__ref__##name) \
|
||||
|
||||
#else
|
||||
|
||||
# define PMQ_PROFILING_CTX
|
||||
# define PMQ_PROFILED_SCOPE(name)
|
||||
# define PMQ_PROFILED_FUNCTION
|
||||
# define PMQ_PROFILED_MUTEX(name) std::mutex name
|
||||
# define PMQ_PROFILED_CONDVAR(name) std::condition_variable name
|
||||
# define PMQ_PROFILED_LOCK(name, themutex) \
|
||||
std::lock_guard<std::mutex> name(themutex)
|
||||
# define PMQ_PROFILED_UNIQUE_LOCK(name, themutex) \
|
||||
std::unique_lock<std::mutex> name(themutex)
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user