New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

2500
meta/source/pmq/pmq.cpp Normal file

File diff suppressed because it is too large Load Diff

243
meta/source/pmq/pmq.hpp Normal file
View File

@@ -0,0 +1,243 @@
#pragma once
#include <stdint.h> // uint64_t etc.
#include <stddef.h> // size_t
struct PMQ_Enqueuer_Stats
{
// how many times was the buffer filled up (the flusher couldn't keep up)?
uint64_t buffer_full_count;
uint64_t total_messages_enqueued;
uint64_t total_bytes_enqueued;
};
struct PMQ_Persister_Stats
{
uint64_t num_async_flushes; // calls to pmq_sync()
uint64_t wakeups;
uint64_t fsync_calls;
uint64_t wal_flushes;
uint64_t wal_flush_bytes;
};
struct PMQ_Stats
{
PMQ_Enqueuer_Stats enqueuer;
PMQ_Persister_Stats persister;
};
struct PMQ;
/* Parmeters for creating a new new queue object (see pmq_create()).
* If basedir_path exists, try to load existing queue data structures from disk.
* Otherwise, create the directory and initialize a new queue there.
* A queue use approximately the number of bytes that were specified in
* create_size at the time of creation. (Something like 2 GiB is not
* unreasonable).
*/
struct PMQ_Init_Params
{
const char *basedir_path;
uint64_t create_size;
};
PMQ *pmq_create(const PMQ_Init_Params *params);
/* Destroy queue object. This will first flush the remaining buffered messages to disk.
*/
void pmq_destroy(PMQ *q);
bool pmq_enqueue_msg(PMQ *q, const void *data, size_t size);
bool pmq_sync(PMQ *q);
void pmq_get_stats(PMQ *q, PMQ_Stats *stats);
/* Information about persisted data */
struct PMQ_Persist_Info
{
uint64_t cks_discard_csn; // oldest CSN in the chunk store (next chunk to be discarded)
uint64_t cks_msn; // next MSN to hit the chunk store
uint64_t wal_msn; // next MSN to hit the WAL
};
PMQ_Persist_Info pmq_get_persist_info(PMQ *q);
/*
* Get an updated value of the byte range of the underlying data store.
* The returned range will be chunk-aligned, but what size chunks are is
* currently not exposed in this API.
*/
/* */
enum PMQ_Read_Result
{
// The message was successfully read back.
PMQ_Read_Result_Success,
// The provided buffer has insufficient size. (The size gets returned back nevertheless)
PMQ_Read_Result_Buffer_Too_Small,
// The requested data is at the current end of the storage area/window. It
// is the next data the will be written. Try again later.
// TODO: We might want to introduce mechanisms to block until new data arrives at
// every level. Currently this has to be implemented in the integration
// code.
PMQ_Read_Result_EOF,
// The requested data is not present. Maybe the requested data was discarded
// concurrently? It is safe to re-position he cursor and retry.
PMQ_Read_Result_Out_Of_Bounds,
// An error was detected by the storage layer
PMQ_Read_Result_IO_Error,
// A problem with the data read back from the storage layer was detected.
PMQ_Read_Result_Integrity_Error,
};
static inline const char *pmq_read_result_string(PMQ_Read_Result readres)
{
switch (readres)
{
case PMQ_Read_Result_Success: return "Success";
case PMQ_Read_Result_Buffer_Too_Small: return "Buffer_Too_Small";
case PMQ_Read_Result_EOF: return "EOF";
case PMQ_Read_Result_Out_Of_Bounds: return "Out_Of_Bounds";
case PMQ_Read_Result_IO_Error: return "IO_Error";
case PMQ_Read_Result_Integrity_Error: return "Integrity_Error";
default: return "(invalid value)";
}
}
struct PMQ_Reader;
PMQ_Reader *pmq_reader_create(PMQ *q);
void pmq_reader_destroy(PMQ_Reader *reader);
PMQ *pmq_reader_get_pmq(PMQ_Reader *reader);
/* Position cursor at the next incoming message -- or, in other words, at the
* current write end of the queue. */
PMQ_Read_Result pmq_reader_seek_to_current(PMQ_Reader *reader);
/* Position cursor at the oldest message (the first message in the chunk
* cks_discard). Note that this is rarely a good idea since this message is
* likely to be discarded concurrently, so it runs risk of losing sync
* immediately or shortly. */
PMQ_Read_Result pmq_reader_seek_to_oldest(PMQ_Reader *reader);
/* Position cursor to given msn. MSNs cannot be directly adressed. The
* implementation will have to load multiple chunks to find it.
* This also means that the call can fail -- I/O errors etc. can be returned.
*/
PMQ_Read_Result pmq_reader_seek_to_msg(PMQ_Reader *reader, uint64_t msn);
/* Read the current message and advance. On success, returns the size of the
* message that was read in @out_size and advances to the next message
* internally.
*/
PMQ_Read_Result pmq_read_msg(PMQ_Reader *reader,
void *data, size_t size, size_t *out_size);
uint64_t pmq_reader_get_current_msn(PMQ_Reader *reader);
/* Attempt to find the MSN of the oldest persisted message.
*
* Note that the MSN that ends up being returned might already be discarded
* once the caller tries to read that message. So calling this function might
* not be a good idea.
*
* Another difficulty, at the implementation level, is that the implementationn
* needs to read the oldest chunk to know the oldest MSN in that chunk. But the
* oldest chunk may be discarded concurrently, so reading it might fail. In
* case of a concurrent discard, the implementation will update its
* oldest-chunk information and then skip ahead some chunks, trying to read a
* slightly newer chunk. This makes the operation more likely to succeed next
* time. This continues until either a chunk was read successfully, or we run
* out of persisted chunks. In the latter case, the implementation returns the
* current "next" MSN. The PMQ always keeps track of this information, so we
* can know it without reading a chunk from disk.
*/
uint64_t pmq_reader_find_old_msn(PMQ_Reader *reader);
/* Equivalent to pmq_get_persist_info(pmq_reader_get_pmq(reader)); */
PMQ_Persist_Info pmq_reader_get_persist_info(PMQ_Reader *reader);
/* pmq_reader_eof() -- Inexpensive check if there are messages available
* currently.
* This allows a concurrent reader procedure synchronize with writers without
* having to actually read a message while holding a lock -- which could block
* writers for a long time if we have to do actual I/O.
*/
bool pmq_reader_eof(PMQ_Reader *reader);
// C++ RAII wrappers
// unique_ptr is maybe not precisely what we're looking for. So we're using some boilerplate instead.
//#include <memory>
//using PMQ_Handle = std::unique_ptr<PMQ, decltype(pmq_destroy)>;
//using PMQ_Reader_Handle = std::unique_ptr<PMQ_Reader, decltype(pmq_reader_destroy)>;
template<typename T, void Deleter(T *)>
class PMQ_Handle_Wrapper
{
T *m_ptr = nullptr;
public:
T *get() const
{
return m_ptr;
}
void drop()
{
if (m_ptr)
{
Deleter(m_ptr);
m_ptr = nullptr;
}
}
operator T *() const // automatic implicit cast to T *
{
return m_ptr;
}
explicit operator bool() const
{
return m_ptr != nullptr;
}
void operator=(PMQ_Handle_Wrapper&& other)
{
drop();
std::swap(m_ptr, other.m_ptr);
}
void operator=(T *ptr)
{
drop();
m_ptr = ptr;
}
void operator=(PMQ_Handle_Wrapper const& other) = delete;
explicit PMQ_Handle_Wrapper(T *ptr = nullptr)
: m_ptr(ptr)
{
}
~PMQ_Handle_Wrapper()
{
drop();
}
};
using PMQ_Handle = PMQ_Handle_Wrapper<PMQ, pmq_destroy>;
using PMQ_Reader_Handle = PMQ_Handle_Wrapper<PMQ_Reader, pmq_reader_destroy>;

View File

@@ -0,0 +1,350 @@
#pragma once
#include <cassert>
#include <errno.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdint.h>
#include <cstdlib>
#include <cstring>
#include <unistd.h>
#include <new>
// macro to align variables to cache line size
// There is C++ standardized value of std::hardware_destructive_interference_size.
// However that currently produces a warning, probably because of concerns about ABI stability.
// So instead I just hardcode a cache line size of 64 bytes for now.
// The worst that could happen would be bad performance.
//#define __pmq_cache_aligned alignas(std::hardware_destructive_interference_size)
#define __pmq_cache_aligned alignas(64)
// These #define's work for GCC and possibly other compilers. To guarantee
// that these definitions are active wherever they could potentially work, I
// will define them unconditionally for now, instead of guarding them with
// #ifdef __GNUC__.
// TODO: try on more compilers and improve compatibility logic!
#if PMQ_WITH_PROFILING
#define __pmq_profiled __attribute__((noinline)) // could consider attribute "noipa" instead of "noinline"
#else
#define __pmq_profiled
#endif
// "artificial" is used for small inlined wrapper methods, such as operator[].
// In theory (and to some extent in practice) the effect should be that the
// code that gets inlined to a call site gets attributed to the _call site_
// instead of to the definition site of the inlined function -- reducing the
// effect of jumping around like wild files when debugging.
#define __pmq_artificial_method inline __attribute__((always_inline, artificial))
#define __pmq_artificial_func static inline __attribute__((always_inline, artificial))
// Attribute used for logging functions and other printf-style functions. If
// these functions are properly annotated, the compiler can check matching
// arguments in usage places.
#define __pmq_formatter(fmt_index, first_arg_index) \
__attribute__((format(printf, (fmt_index), (first_arg_index))))
// treat format warnings as errors for the PMQ
// This could be a build system flag but for now I want the change just for
// this module in the larger system
#pragma GCC diagnostic error "-Wformat"
#ifdef NDEBUG
#define pmq_assert(expr)
#else
static inline void __pmq_assert_fail(const char *expr, const char *file, int line, const char *func)
{
// this hopefully gives the logger a chance to save the logs.
// If there was time, we should probably implement the logger in a separate component,
// communicating using a shared memory mapping.
sleep(3);
__assert_fail(expr, file, line, func);
}
#define pmq_assert(expr) do { if (! (expr)) { __pmq_assert_fail(#expr, __FILE__, __LINE__, __func__); } } while (0)
#endif
__pmq_artificial_func
void __pmq_assert_aligned(const void *ptr, size_t size)
{
assert((uintptr_t) (ptr) % size == 0);
}
template<size_t size, typename T>
__pmq_artificial_func
T __attribute__((aligned(size))) *__pmq_assume_aligned(const T *ptr)
{
__pmq_assert_aligned(ptr, size);
return (T *) __builtin_assume_aligned(ptr, size);
}
static inline bool pmq_is_power_of_2(uint64_t value)
{
assert(value != 0);
return (value & (value - 1)) == 0;
}
static inline uint64_t pmq_mask_power_of_2(uint64_t value)
{
assert(value != 0);
assert((value & (value - 1)) == 0);
return value - 1;
}
static inline constexpr uint64_t PMQ_Kilobytes(uint64_t count) { return count << 10; }
static inline constexpr uint64_t PMQ_Megabytes(uint64_t count) { return count << 20; }
static inline constexpr uint64_t PMQ_Gigabytes(uint64_t count) { return count << 30; }
static inline constexpr uint64_t PMQ_Terabytes(uint64_t count) { return count << 40; }
static inline constexpr uint64_t PMQ_Petabytes(uint64_t count) { return count << 50; }
/* Untyped slice class. This is mainly used for slice-copy operations, both for
* memory and disk I/O. It saves some boilerplate and is a little bit safer to use.
*
* Note, we should check if we can replace this with a standard C++ type maybe.
* But I personally don't consider this code a liability, and add
* __pmq_artificial_method method improves the debugging experience.
*/
class Untyped_Slice
{
void *m_data;
size_t m_size;
public:
__pmq_artificial_method void *data() const { return m_data; }
__pmq_artificial_method size_t size() const { return m_size; }
__pmq_artificial_method
Untyped_Slice offset_bytes(size_t offset) const
{
assert(offset <= m_size);
return Untyped_Slice((char *) m_data + offset, m_size - offset);
}
__pmq_artificial_method
Untyped_Slice limit_size_bytes(size_t size) const
{
assert(size <= m_size);
return Untyped_Slice(m_data, size);
}
__pmq_artificial_method
Untyped_Slice sub_slice_bytes(size_t offset, size_t size) const
{
return offset_bytes(offset).limit_size_bytes(size);
}
__pmq_artificial_method
Untyped_Slice()
{
m_data = nullptr;
m_size = 0;
}
__pmq_artificial_method
Untyped_Slice(void *data, size_t size)
{
m_data = data;
m_size = size;
}
};
__pmq_artificial_func
void zero_out_slice(Untyped_Slice dst)
{
memset(dst.data(), 0, dst.size());
}
__pmq_artificial_func
void copy_slice(Untyped_Slice dst, Untyped_Slice src)
{
assert(dst.size() == src.size());
memcpy(dst.data(), src.data(), dst.size());
}
__pmq_artificial_func
void copy_slice_bytes(Untyped_Slice dst, Untyped_Slice src, size_t size_bytes)
{
assert(size_bytes <= dst.size());
assert(size_bytes <= src.size());
memcpy(dst.data(), src.data(), size_bytes);
}
__pmq_artificial_func
void copy_to_slice(Untyped_Slice slice, const void *data, size_t size)
{
assert(slice.size() >= size);
memcpy(slice.data(), data, size);
}
__pmq_artificial_func
void copy_from_slice(void *data, Untyped_Slice slice, size_t size)
{
assert(slice.size() >= size);
memcpy(data, slice.data(), size);
}
/*
* Typed slice type.
*
* Note, we should check if we can replace this using std::span (C++20).
*/
template<typename T>
class Slice
{
T *m_data;
size_t m_count;
public:
__pmq_artificial_method
T *data() const
{
return m_data;
}
__pmq_artificial_method
size_t count() const
{
return m_count;
}
__pmq_artificial_method
size_t size_in_bytes() const
{
return m_count * sizeof (T);
}
__pmq_artificial_method
T get(size_t index) const
{
assert(index < m_count);
return m_data[index];
}
__pmq_artificial_method
T& at(size_t index)
{
assert(index < m_count);
return m_data[index];
}
__pmq_artificial_method
T const& at(size_t index) const
{
assert(index < m_count);
return m_data[index];
}
__pmq_artificial_method
Untyped_Slice untyped() const
{
return Untyped_Slice(m_data, m_count * sizeof (T));
}
__pmq_artificial_method
Slice<T> slice_from(size_t start_index)
{
assert(start_index <= m_count);
return Slice<T>(m_data + start_index, m_count - start_index);
}
__pmq_artificial_method
Slice<T> slice_to(size_t count)
{
assert(count <= m_count);
return Slice<T>(m_data, count);
}
__pmq_artificial_method
Slice<T> sub_slice(size_t start_index, size_t count)
{
return slice_from(start_index).slice_to(count);
}
__pmq_artificial_method
Slice()
{
m_data = nullptr;
m_count = 0;
}
__pmq_artificial_method
Slice(T *data, size_t count)
{
m_data = data;
m_count = count;
}
};
template<typename T>
__pmq_artificial_func
void copy_to_slice(Slice<T> slice, const void *data, size_t size)
{
assert(slice.size_in_bytes() >= size);
memcpy(slice.data(), data, size);
}
template<typename T>
__pmq_artificial_func
void copy_from_slice(void *data, Slice<T> slice, size_t size)
{
assert(slice.size_in_bytes() >= size);
memcpy(data, slice.data(), size);
}
// A reference type, which wraps a bare pointer. The semantics are the same as
// pointer but we don't allow indexing. In other words, the point of this class
// is to make clear that it doesn't point to an array but only to a single
// (potentially null) object.
// In contrast to C++ reference types (T& value), no surprises given value
// syntax but pointer semantics.
template<typename T>
class Pointer
{
T *m_ptr;
public:
__pmq_artificial_method
T *ptr() const
{
return m_ptr;
}
__pmq_artificial_method
const T *const_ptr() const
{
return m_ptr;
}
__pmq_artificial_method
Pointer<const T> as_const() const
{
return Pointer<const T>(m_ptr);
}
__pmq_artificial_method
T *operator->()
{
return m_ptr;
}
__pmq_artificial_method
Pointer(T *ptr)
{
assert(ptr);
m_ptr = ptr;
}
};

View File

@@ -0,0 +1,851 @@
#pragma once
#include <new> // std::bad_alloc
#include "pmq_base.hpp"
#include "pmq_logging.hpp"
#include "pmq_posix_io.hpp"
#include "pmq_profiling.hpp"
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <dirent.h>
//
// Simple allocating slice class with delayed allocation.
//
// Why don't I just use std::vector or similar? After all, std::vector is a
// well known standard solution that allocates a contiguous buffer of memory.
//
// I understand this concern, but I've written several simple classes anyway.
// Let me try and defend this case of "NIH". (It may or may not convince the
// reader).
//
// This code is much more straightforward and simple compared to STL headers.
// It is basically "new" and "delete" wrapped in a simple package together with
// operator[] and a way to get a slice to the memory without attached lifetime
// semantics.
//
// Most of the STL classes try to be very generic solutions applicable in a
// wide variety of use cases. While using with standardized solutions has the
// advantages of familiarity, this flexibility and wide applicability comes
// with a complexity cost that brings a disadvantage to anyone working with the
// codebase.
//
// Beyond having a fill level separate from allocation size (size() vs
// capacity()), std::vector has all sorts of methods and functionality to
// support pushing, popping, emplacing, iterators, constructors, destructors,
// and so on. It is highly flexible, which shows whenever an actual
// instanciated vector type is printed on the terminal, including template
// allocator parameter amongst to other things.
//
// All this is ill-fitting for our simple use case. For a queue we just need a
// few preallocated buffers. Just for convenience and to get a little safety,
// the Alloc_Slice wrapper class was created -- so we can do bounds checking
// and get to automatically deallocate the buffers in the destructor.
//
// The size() field and associated semantics that come with std::vector are
// baggage that we can't make use of (we have multiple cursors that wrap around
// our buffers in circular fashion). These semantics are not just available,
// but are understood by programmers as how std::vector gets used.
//
// From a mere functionality standpoint this shouldn't be an issue -- We could
// make sure that we call .resize(N) only once in the beginning and never call
// e.g. push_back(), emplace_back(), reserve(), or similar. This way we'd
// essentially be considering the size() as a constant i.e. ignore it.
//
// However, again, this usage of the type is not guaranteed. The sight of a
// std::vector normally suggest pushing (maybe popping), resizing and
// reserving, buffer reallocation, pointer/iterator invalidation, and runtime
// exceptions.
//
// With the Alloc_Slice class on the other hand, there is no reallocation and
// consequently no iterator invalidation. Exceptions might or might not happen
// depending on compile settings -- but only at construction time, i.e. program
// startup. Because no reallocations are possible, no pointer invalidation /
// iterator invalidation is possible.
//
// Compared to std::vector and other STL headers, significantly less header
// code gets included, so the code compiles quicker. How much quicker? In a
// simple test with a single file, adding any of vector, string, map etc.
// added around 100ms of compilation time (each). I believe I've seen much worse,
// but just multiply 100-400ms by the number of files in a large project and
// there may be a good argument for avoiding to include STL headers based on
// build time. (TODO: refer to example program).
//
// In fairness, this problem may be partially solved with precompiled headers,
// but those come with some issues too. (build setup, pollution, still have to
// compile on each rebuild or precompiled header change).
//
// With the Alloc_Slice class, methods like operator[] have been marked as
// "artificial", meaning it's easier to debug code without jumping all over the
// place. With std::vector and similar classes, I believe there is no way, or
// no standardized way, to build such that we don't jump around files like wild
// when debugging.
//
// If these arguments haven't been convincing, I'll end it now anyway -- the
// text is already much bigger than the actual code.
template<typename T>
class Alloc_Slice
{
T *m_ptr = nullptr;
size_t m_capacity = 0;
public:
__pmq_artificial_method
T& operator[](size_t i) const
{
return m_ptr[i];
}
__pmq_artificial_method
T *data() const
{
return m_ptr;
}
__pmq_artificial_method
size_t capacity() const
{
return m_capacity;
}
__pmq_artificial_method
Slice<T> slice() const
{
return Slice<T>(m_ptr, m_capacity);
}
__pmq_artificial_method
Untyped_Slice untyped_slice() const
{
return slice().untyped();
}
void allocate(size_t capacity)
{
assert(! m_ptr);
m_ptr = new T[capacity];
m_capacity = capacity;
}
~Alloc_Slice()
{
delete[] m_ptr;
}
};
// Posix_FD
//
// Simple file-descriptor holder. The only important purpose is automatically
// closing the fd in the destructor. Setting the fd can happen in the
// constructor or be delayed. A new fd can be set after closing the old one.
// The fd can be retrieved using the .get() method. There are no other methods
// defined, the point here is not to make an abstraction over FDs but just to
// auto-close it.
//
// There is not much more to say. A concern was brought up was that it would be
// better to use an existing class. Again, it's important to note that we're
// not trying to add some (probably ill-defined) abstraction. The fact that
// this class stores fds is not hidden and there isn't any I/O functionality
// contained.
//
// Given this, I wasn't sure what existing class to use that does the same
// thing. This Posix_FD class was quick and easy to write and I hope it is easy
// to read too.
//
// Another concern was that we shouldn't use close() directly here, but instead
// use an abstraction (from an existing library) that papers over platform
// differences such that the code can work on e.g. Windows too. (Windows has a
// Poxix FS layer as well but the code probably wouldn't work without extra
// work and handling of subtle differences).
//
// I can understand this concern, however BeeGFS can not be easily ported to
// e.g. Windows anyway, and this has never been a declared goal of the project.
// BeeGFS currently can't build on Windows and probably never will.
//
// The usage code currently makes non-trivial use of advanced POSIX and Linux
// functions, such as openat(), fsync(), mmap(), pread(), pwrite(). sendfile()
// was used earlier, and might come back. We rely on Posix file permissions
// too, and on certain semantics like for example O_CREAT | O_EXCL during file
// creation.
//
// I'm not aware of a better API that is more portable while providing the same
// functionality.
//
// Also, papering over platform differences may be harder than it initially
// sounds as soon as good performance and thus good control and good error
// handling is a requirement. To be portable, special handling of platform
// idiosyncracies might be required, and the architecture would have to change
// anyway: away from synchronous function calls which would make the
// abstraction leak into the core code, and towards a more asynchronous model
// that is better decoupled from the core code.
//
// It was proposed that std::ifstream / std::ofstream (or similar standardized
// class) could be used instead. std::ifstream in particular would be a bad fit
// since it is a very generic class that comes with buffering and formatting by
// default. I can't easily see how to replace the calls I listed above using
// std::ifstream. Event if it's possible, the result may be more complicated /
// require use of the underlying Posix FD anyway / be less clear / be more code
// / require to give up some control over syscalls etc. ifstream uses
// exceptions and has facilities such as formatting that aren't needed, but the
// presence of this attached functionality would make the purpose less clear
// IMO.
//
class Posix_FD
{
int m_fd = -1;
public:
__pmq_artificial_method
int get()
{
return m_fd;
}
__pmq_artificial_method
bool valid()
{
return m_fd != -1;
}
int close_fd()
{
int ret = 0;
if (m_fd != -1)
{
ret = close(m_fd);
m_fd = -1;
}
return ret;
}
__pmq_artificial_method
void set(int fd)
{
assert(m_fd == -1);
m_fd = fd;
}
__pmq_artificial_method
void operator=(int fd)
{
set(fd);
}
__pmq_artificial_method
Posix_FD()
{
}
__pmq_artificial_method
Posix_FD(int fd)
{
set(fd);
}
__pmq_artificial_method
~Posix_FD()
{
close_fd();
}
};
//
// Libc_DIR
//
// Similar to Posix_FD, but for libc DIR * handles. Same rationale for why I've
// written this applies as for Posix_FD.
//
// This class is currently not used so could be removed.
//
class Libc_DIR
{
DIR *m_dir = nullptr;
public:
__pmq_artificial_method
bool valid()
{
return m_dir != nullptr;
}
__pmq_artificial_method
DIR *get()
{
return m_dir;
}
__pmq_artificial_method
void set(DIR *dir)
{
assert(m_dir == nullptr);
m_dir = dir;
}
void close_dir()
{
if (m_dir)
{
closedir(m_dir);
m_dir = nullptr;
}
}
__pmq_artificial_method
void operator=(DIR *dir)
{
set(dir);
}
__pmq_artificial_method
Libc_DIR()
{
}
__pmq_artificial_method
Libc_DIR(DIR *dir)
{
m_dir = dir;
}
__pmq_artificial_method
~Libc_DIR()
{
close_dir();
}
};
//
// Mmap_Region
//
// Similar to Posix_FD, but for memory mappings.
//
// On destruction, unmaps the mapped region using munmap().
//
class MMap_Region
{
void *m_ptr = MAP_FAILED;
size_t m_length = 0;
public:
__pmq_artificial_method
void *get() const
{
return m_ptr;
}
__pmq_artificial_method
Untyped_Slice untyped_slice() const
{
return Untyped_Slice(m_ptr, m_length);
}
__pmq_artificial_method
bool valid()
{
return m_ptr != MAP_FAILED;
}
void close_mapping()
{
if (m_ptr != MAP_FAILED)
{
if (munmap(m_ptr, m_length) == -1)
{
// should not happen. Simply printing the error for now
pmq_perr_ef(errno, "WARNING: munmap() failed");
}
m_ptr = MAP_FAILED;
m_length = 0;
}
}
// like mmap but returns whether successful
bool create(void *addr, size_t newlength, int prot, int flags,
int fd, off_t offset)
{
assert(m_ptr == MAP_FAILED);
void *newptr = mmap(addr, newlength, prot, flags, fd, offset);
if (newptr == MAP_FAILED)
return false;
m_ptr = newptr;
m_length = newlength;
return true;
}
__pmq_artificial_method
~MMap_Region()
{
close_mapping();
}
};
// Mutex_Protected
//
// Simple wrapper class that protects a data item with a mutex.
// The load() and store() mutex implement thread-synchronized read and write
// access to the data item by locking the resource with a mutex during the
// operation.
//
// A class like Folly::Synchronized might replace this. But again, this was
// very easy to write and is extremely small. Pulling in a large dependency
// just for that might not be justified. Also, having our own class allows
// choosing the mutex type. For example, if we want to profile mutexes using
// the Tracy frame profiler, we need to use Tracy's mutex wrappers (here,
// hidden in the PMQ_PROFILED_MUTEX wrapper). While Folly::Synchronized supports
// custom mutexes, one would need to understand and impleemnt "the extended
// protocol implemented in folly/synchronized/Lock.h".
//
// Upon quick browsing of the 1000 lines in Lock.h, it isn't immediately clear
// what that protocol entails and how much work it would be (if any) to wrap
// our own mutex type (which is potentially a wrap of std::mutex already) to
// conform to that protocol.
//
// Maybe there is something in the C++ standard that is suited as a
// replacement?
//
// Maybe there is, but I consider it much easier to just write 2 methods
// totalling 4 straightforward lines of code...
//
template<typename T>
class Mutex_Protected
{
PMQ_PROFILED_MUTEX(m_mutex);
T m_value;
public:
void store(T value)
{
PMQ_PROFILED_LOCK(lock_, m_mutex);
m_value = value;
}
T load()
{
PMQ_PROFILED_LOCK(lock_, m_mutex);
return m_value;
}
};
/*
* String "slice" that can be passed around. No lifetime semantics or
* unexpected copying etc.
*
* We could use std::string_view instead, but that is a templated type. The
* idea of PMQ_String is to wrap just a char-pointer with a size, and nothing
* more, to have a package that one can ship around. We mostly use strings for
* printf-style formatting and to open files, and we don't need or want any
* more complicated semantics than that.
*/
struct PMQ_String
{
const char *buffer;
size_t size;
};
/*
* Simple string "holder" class that allocates and frees its buffer. The
* contained string is immutable once constructed. But a new one can be
* "swapped" in by dropping the old string and creating a new one.
*
* Is this a case of NIH when there is std::string? Maybe, but basically the
* same arguments as for Alloc_Slice and the other classes above apply.
*
* std::string
*
* - is somewhat slow to compile
* - Unexpected allocations / copies (and thus exceptions as well) can happen
* very easily, without anyone noticing -- For example, it's as easy as
* writing "auto x = y" instead of "auto& x = y".
* - Apart from exceptions and copies / resizes, appending, there is more
* complexity that we don't need and don't want and that would actually be a
* misfit for our project. Ugly error messages with huge types (...
* std::basic_char ... etc.) is only a small symptom of this.
*/
class PMQ_Owned_String
{
PMQ_String m_string = {};
public:
bool valid() const
{
return m_string.buffer != nullptr;
}
__pmq_artificial_method
PMQ_String get() const
{
return m_string;
}
void drop()
{
// Checking only for clarity. free() and the rest of the code would work
// with a null buffer too.
if (m_string.buffer != nullptr)
{
free((void *) m_string.buffer);
m_string.buffer = nullptr;
m_string.size = 0;
}
}
void set(const char *buffer)
{
assert(! m_string.buffer);
char *copy = strdup(buffer);
if (copy == nullptr)
{
// is an exception what we want / need?
throw std::bad_alloc();
}
m_string.buffer = copy;
m_string.size = strlen(buffer);
}
__pmq_artificial_method
PMQ_Owned_String()
{
m_string.buffer = nullptr;
m_string.size = 0;
}
~PMQ_Owned_String()
{
drop();
}
};
/*
* SNs (sequence numbers)
*
* Sequence numbers, and the ringbuffers that build on them, are a core concept
* of how the PMQ works.
*
* I believe they are pretty much what is elsewhere known as "LMAX Disruptor"
* (google it).
*
* Sequence numbers are 64-bit unsigned integers that can wraparound (but this
* is only theoretical -- wraparound is probably completely untested since
* 64-bit numbers don't overflow easily in practice).
*
* Ringbuffers have a number of slots that is 2^N for some N. SN's are mapped
* to slots with wrap-around in the ringbuffer's 2^N slots by using the lowest
* N bits of the SN to index into the slots array.
*
* The SN templated class provides some type safety -- the Tag type is a
* "phantom tag" (can be implemented by making a new "empty" class) that
* prevents indexing into a ringbuffer using a mismatching sequence number. For
* example, we have a ringbuffer of input-slots that should be indexed by *slot
* sequence numbers* (SSNs). And we have a ringbuffer of chunks that should be
* indexed by *chunk sequence numbers (CSNs). The on-disk chunk store is
* another kind of ringbuffer that works with the same principle of wrapping
* around automatically.
*
* We also track *message sequence numbers* (MSNs) but we don't use them for
* indexing, only for binary search.
*
* Mathematically, SNs form an affine space. This is like a vector space but
* without a designated origin (pls forgive me if what I write here is slightly
* incorrect as far as mathematics is concerned. Only the idea matters). There
* is a 0 value, but it is not meaningfully different compared to any other
* value.
*
* One can subtract two sequence numbers to get a distance (represented as bare
* uint64_t), and one can add a distance to a sequence number to get a new
* sequence number. However, unlike a vector space with designated 0, one can
* not add two sequence numbers meaningfully (SN<T> has operator+(uint64_t d)
* but no operator+(SN<T>& other).
*/
template<typename Tag>
class SN
{
uint64_t m_value;
public:
explicit SN(uint64_t value)
{
m_value = value;
}
// Some C++ trivia following. In most cases you can ignore this and just use
// the class similar to primitive integers.
//
// Here we specify an *explicitly-defaulted default-constructor*. This will
// allow us to initialize the object with undefined (garbage) value if we
// want so.
//
// Explanation: Since we have explicitly specified the constructor with 1
// argument already, there wouldn't be an implicit default constructor (a
// constructor with no arguments). To get a default constructor, we need to
// explicitly specify one. We need a default constructor (no constructor
// arguments) if we want to write
//
// SN sn;
//
// For simple data types (like SN), we typically want the above line to
// leave the object's members uninitialized (garbage values). While this is
// in some ways dangerous, it can be simpler especially for objects where
// zero-initialization isn't very convenient or meaningful. Leaving values
// uninitialized in the default constructor also allows the compiler to
// catch bugs in some situations when the user unintentionally forgot to
// specify an explicit value.
//
// Note a gotcha: There is a difference between an empty default constructor
//
// SN() {}
//
// and an (explicitly or implicitly) defaulted default constructor:
//
// SN() = default;
//
// If we use the class like this:
//
// SN x {};
// SN y = SN(); // or like this
// SN z = {}; // or like this...
//
// then x will contain garbarge with the empty default constructor, but will
// be zero-initialized with the (explicitly-) defaulted default constructor.
// We'd typically want zero initialization with this syntax.
SN() = default;
__pmq_artificial_method
uint64_t value() const
{
return m_value;
}
__pmq_artificial_method
void operator++()
{
m_value++;
}
__pmq_artificial_method
void operator++(int)
{
m_value++;
}
__pmq_artificial_method
SN operator+=(uint64_t d)
{
m_value += d;
return *this;
}
__pmq_artificial_method
SN& operator-=(uint64_t d)
{
m_value -= d;
return *this;
}
__pmq_artificial_method
SN operator+(uint64_t d) const
{
return SN(m_value + d);
}
__pmq_artificial_method
SN operator-(uint64_t d) const
{
return SN(m_value - d);
}
__pmq_artificial_method
uint64_t operator-(SN other) const
{
return m_value - other.m_value;
}
__pmq_artificial_method
bool operator==(SN other) const
{
return m_value == other.m_value;
}
__pmq_artificial_method
bool operator!=(SN other) const
{
return m_value != other.m_value;
}
};
/*
* COMPARING SEQUENCE NUMBERS
* ==========================
*
* Since sequence numbers wrap around (in theory, when 64 bits overflow) they
* have no natural ordering.
*
* However, in practice, sequence numbers are used to index in much smaller
* buffer, and at any given time there is only a small window of sequence
* numbers. It's a sliding window, but a window still.
*
* So, admitting that the sequence numbers in a given window may wraparound,
* back to 0, we can still assume that they never "overtake" each other.
* We can subtract two numbers using unsigned arithmetic and determine their
* relative ordering from the result. Centering our worldview at a number x, we
* divide the space of uint64_t numbers into those that are less than x (x -
* 2^63 to x) and those that are greater than x (x to 2^63).
*
* Note that this relation is not transitive (x <= y && y <= z does not imply x
* <= z), and not antisymmetric -- (x + 2^63) is both greater and less than x.
* So it's not a true ordering relation, but in practice we can use it to
* reliably compare items by "age".
*
* The value 1 should be considered greater than UINT64_MAX, since 1 -
* UINT64_MAX == 2. Conversely, UINT64_MAX is less than 1 since UINT64_MAX - 1
* equals (UINT64_MAX - 1), which is a.
*
*/
// Comparing bare uint64_t sequence values.
__pmq_artificial_func
bool _sn64_lt(uint64_t a, uint64_t b)
{
return b - (a + 1) <= UINT64_MAX / 2;
}
__pmq_artificial_func
bool _sn64_le(uint64_t a, uint64_t b)
{
return b - a <= UINT64_MAX / 2;
}
__pmq_artificial_func
bool _sn64_ge(uint64_t a, uint64_t b)
{
return a - b <= UINT64_MAX / 2;
}
__pmq_artificial_func
bool _sn64_gt(uint64_t a, uint64_t b)
{
return a - (b + 1) <= UINT64_MAX / 2;
}
// Comparing type-safe "tagged" SN values
template<typename Tag>
__pmq_artificial_func
bool sn64_lt(SN<Tag> a, SN<Tag> b)
{
return b - (a + 1) <= UINT64_MAX / 2;
}
template<typename Tag>
__pmq_artificial_func
bool sn64_le(SN<Tag> a, SN<Tag> b)
{
return b - a <= UINT64_MAX / 2;
}
template<typename Tag>
__pmq_artificial_func
bool sn64_ge(SN<Tag> a, SN<Tag> b)
{
return a - b <= UINT64_MAX / 2;
}
template<typename Tag>
__pmq_artificial_func
bool sn64_gt(SN<Tag> a, SN<Tag> b)
{
return a - (b + 1) <= UINT64_MAX / 2;
}
template<typename Tag>
__pmq_artificial_func
bool sn64_inrange(SN<Tag> sn, SN<Tag> lo, SN<Tag> hi)
{
return sn - lo <= hi - lo;
}
// Ringbuffer containing a buffer (power-of-2 size) of element of type V. It
// can be "indexed" using SN's of matching type.
template<typename Tag, typename V>
class Ringbuffer
{
using K = SN<Tag>;
V *m_ptr = nullptr;
size_t m_count = 0;
public:
__pmq_artificial_method
uint64_t slot_count() const
{
return m_count;
}
__pmq_artificial_method
void reset(Slice<V> slice)
{
assert(pmq_is_power_of_2(slice.count()));
m_ptr = slice.data();
m_count = slice.count();
}
__pmq_artificial_method
Slice<V> as_slice() const
{
return Slice<V>(m_ptr, m_count);
}
__pmq_artificial_method
const V *get_slot_for(K k) const
{
return &m_ptr[k.value() & (m_count - 1)];
}
__pmq_artificial_method
V *get_slot_for(K k)
{
return &m_ptr[k.value() & (m_count - 1)];
}
__pmq_artificial_method
Ringbuffer()
{
}
__pmq_artificial_method
Ringbuffer(V *ptr, uint64_t size)
{
reset(ptr, size);
}
};

View File

@@ -0,0 +1,241 @@
#include "pmq_logging.hpp"
#include "pmq_common.hpp"
#include <cassert>
#include <cstdio>
// The logging module can either print to stderr or use the BeeGFS metadata
// server's logging backend.
#ifdef PMQ_TEST
# ifndef PMQ_LOG_LEVEL
# error PMQ_LOG_LEVEL must be defined when compiling test case
# endif
#define INTEGRATE_WITH_METADATA_SERVER 0
#else
#define INTEGRATE_WITH_METADATA_SERVER 1
#endif
#if INTEGRATE_WITH_METADATA_SERVER
// Integrate into metadata server
#include <common/app/log/Logger.h>
#endif
struct Log_Buffer
{
PMQ_PROFILED_MUTEX(mutex);
PMQ_PROFILED_CONDVAR(writeable); // reader => writer
PMQ_PROFILED_CONDVAR(readable); // writer => reader. Corresponding mutex is in Log_Message
Alloc_Slice<Log_Message> msgs;
size_t capacity = 0;
size_t writepos = 0;
size_t readpos = 0;
Log_Buffer()
{
// TODO: this costs a lot of memory. However, a previous setting of 64
// wasn't enough for high-frequency logging. We need more dynamic and
// judicious memory allocation.
capacity = 1024;
msgs.allocate(capacity);
}
};
static void log_buffer_write(Log_Buffer *logbuf, Log_Message const *input)
{
PMQ_PROFILED_UNIQUE_LOCK(lock, logbuf->mutex);
while (logbuf->writepos - logbuf->readpos == logbuf->capacity)
logbuf->writeable.wait(lock);
size_t mask = logbuf->capacity - 1;
size_t pos = logbuf->writepos;
Log_Message *msg = &logbuf->msgs[pos & mask];
msg->size = input->size;
memcpy(msg->data, input->data, input->size);
++ logbuf->writepos;
// hoping that this is cheap: otherwise we should track the number of
// readers and check it before calling notify_one()
logbuf->readable.notify_one();
}
static void _log_buffer_read(Log_Buffer *logbuf, Log_Message *output)
{
size_t mask = logbuf->capacity - 1;
size_t pos = logbuf->readpos;
Log_Message *msg = &logbuf->msgs[pos & mask];
output->size = msg->size;
memcpy(output->data, msg->data, msg->size);
++ logbuf->readpos;
// hoping that this is cheap: otherwise we should track the number of
// writers and check it before calling notify_one()
logbuf->writeable.notify_one();
}
static void log_buffer_read(Log_Buffer *logbuf, Log_Message *output)
{
PMQ_PROFILED_UNIQUE_LOCK(lock, logbuf->mutex);
while (logbuf->writepos == logbuf->readpos)
logbuf->readable.wait(lock);
_log_buffer_read(logbuf, output);
}
static bool log_buffer_try_read(Log_Buffer *logbuf, Log_Message *output)
{
PMQ_PROFILED_LOCK(lock, logbuf->mutex);
if (logbuf->writepos == logbuf->readpos)
return false;
_log_buffer_read(logbuf, output);
return true;
}
static bool log_buffer_try_read_timeout_millis(Log_Buffer *logbuf, Log_Message *output, int millis)
{
auto time_point = std::chrono::steady_clock::now() + std::chrono::milliseconds(millis);
PMQ_PROFILED_UNIQUE_LOCK(lock, logbuf->mutex);
while (logbuf->writepos == logbuf->readpos)
{
if (logbuf->readable.wait_until(lock, time_point) == std::cv_status::timeout)
return false;
}
_log_buffer_read(logbuf, output);
return true;
}
static Log_Buffer global_log_buffer;
void pmq_write_log_message(Log_Message const *input)
{
log_buffer_write(&global_log_buffer, input);
}
void pmq_read_log_message(Log_Message *output)
{
log_buffer_read(&global_log_buffer, output);
}
bool pmq_try_read_log_message(Log_Message *output)
{
return log_buffer_try_read(&global_log_buffer, output);
}
bool pmq_try_read_log_message_timeout_millis(Log_Message *output, int millis)
{
return log_buffer_try_read_timeout_millis(&global_log_buffer, output, millis);
}
void log_msg_printfv(Log_Message *msg, const char *fmt, va_list ap)
{
int ret = vsnprintf(msg->data + msg->size, sizeof msg->data - 1 - msg->size, fmt, ap);
assert(ret >= 0);
msg->size += (size_t) ret;
if (msg->size > sizeof msg->data - 1)
msg->size = sizeof msg->data - 1;
msg->data[msg->size] = 0;
}
// Note: this is a method (implicit this pointer) so we use
// __pmq_formatter(2, 3) instead of __pmq_formatter(1, 2).
void __pmq_formatter(2, 3) log_msg_printf(Log_Message *msg, const char *fmt, ...) // NOLINT this is safe because of __pmq_formatter() annotation
{
va_list ap;
va_start(ap, fmt);
log_msg_printfv(msg, fmt, ap);
va_end(ap);
}
void pmq_msg_ofv(const PMQ_Msg_Options& opt, const char *fmt, va_list ap)
{
bool print_errno = (bool) (opt.flags & PMQ_MSG_OPT_ERRNO);
uint32_t priority = opt.flags & PMQ_MSG_OPT_LVL_MASK;
Log_Message log_msg;
log_msg.size = 0;
#if INTEGRATE_WITH_METADATA_SERVER
int metadata_priority = 0;
switch (priority)
{
case PMQ_MSG_OPT_LVL_DEBUG: metadata_priority = Log_DEBUG; break;
case PMQ_MSG_OPT_LVL_INFO: metadata_priority = Log_NOTICE; break;
case PMQ_MSG_OPT_LVL_WARN: metadata_priority = Log_WARNING; break;
case PMQ_MSG_OPT_LVL_ERR: metadata_priority = Log_ERR; break;
default: assert(0); // can't happen at least currently where log mask has 2 bits.
}
#else
// Early return, avoiding most of the work if the message has less priority
// than the log level.
// TODO: we should have something like this for metadata server integration
// too.
if (PMQ_LOG_LEVEL > priority)
return;
switch (priority)
{
case PMQ_MSG_OPT_LVL_DEBUG: log_msg_printf(&log_msg, "DEBUG: "); break;
case PMQ_MSG_OPT_LVL_INFO: log_msg_printf(&log_msg, "INFO: "); break;
case PMQ_MSG_OPT_LVL_WARN: log_msg_printf(&log_msg, "WARNING: "); break;
case PMQ_MSG_OPT_LVL_ERR: log_msg_printf(&log_msg, "ERROR: "); break;
default: assert(0); // can't happen at least currently where log mask has 2 bits.
}
#endif
log_msg_printfv(&log_msg, fmt, ap);
if (print_errno)
{
char errbuf[64];
const char *errstr;
#if (_POSIX_C_SOURCE >= 200112L) && ! _GNU_SOURCE
{
// XSI compliant strerror_r()
int ret = strerror_r(opt.errnum, errbuf, sizeof errbuf);
if (ret == 0)
errstr = errbuf;
}
#else
{
// GNU version of strerror_r()
errstr = strerror_r(opt.errnum, errbuf, sizeof errbuf);
}
#endif
if (! errstr)
{
snprintf(errbuf, sizeof errbuf, "(errno=%d)", opt.errnum);
errstr = errbuf;
}
log_msg_printf(&log_msg, ": %s", errstr);
}
#if INTEGRATE_WITH_METADATA_SERVER
// Integration into metadata server
Logger *logger = Logger::getLogger();
logger->log(LogTopic_EVENTLOGGER, metadata_priority, opt.loc.file, opt.loc.line, log_msg.data);
#else
log_msg_printf(&log_msg, "\n");
//fwrite(log_msg.data, log_msg.size, 1, stderr);
pmq_write_log_message(&log_msg);
#endif
}
void __pmq_formatter(2, 3) pmq_msg_of(const PMQ_Msg_Options& opt, const char *fmt, ...) // NOLINT this is safe because of use of __pmq_formatter() annotation
{
va_list ap;
va_start(ap, fmt);
pmq_msg_ofv(opt, fmt, ap);
va_end(ap);
}

View File

@@ -0,0 +1,112 @@
#pragma once
#include "pmq_base.hpp"
enum
{
PMQ_MSG_OPT_DEFAULT = 0,
PMQ_MSG_OPT_ERRNO = (1 << 0),
PMQ_MSG_HAS_SOURCE_LOC = (1 << 1),
PMQ_MSG_OPT_LVL_MASK = (3 << 2), // bits 3 and 4 for debug level.
PMQ_MSG_OPT_LVL_DEBUG = (0 << 2),
PMQ_MSG_OPT_LVL_INFO = (1 << 2),
PMQ_MSG_OPT_LVL_WARN = (2 << 2),
PMQ_MSG_OPT_LVL_ERR = (3 << 2),
};
struct PMQ_Source_Loc
{
const char *file;
uint32_t line;
};
struct PMQ_Msg_Options
{
PMQ_Source_Loc loc;
uint32_t flags; // PMQ_MSG_OPT_
int errnum;
};
// Logging functions / macros.
//
// The following functions are typically used in the client code.
//
// pmq_msg_f(fmt, ...): Submit a log message with default log level and format string + var-args
// pmq_perr_f(fmt, ...): Submit a log message with error log level and format string + var-args
// pmq_perr_ef(errno, fmt, ...): like pmq_perr_f() but also add text for given system error code ("errno")
//
// To explain all functions available here: they are made according to a pattern
//
// pmq_{LVL}_{MNEMNONICS}
//
// LVL: logging level, possible options
// - msg: Default level or use specified level ('l' mnemnonic)
// - debug: Debug level
// - warn: Warning level
// - perr: Error level ("print-error")
//
// MNEMNONICS: combination of 1-letter chars
// - l: means a logging level is specified (only available with 'msg' category)
// - e: add a text for specified system error code ("errno")
// - f: "format", like in the stdio function printf().
// - v: in combination with f (so 'fv'), means the arguments come as a va_list, like in stdio function vfprintf().
// - o: Use a single options struct holding level, errno explictly, as well as source code location info.
void pmq_msg_ofv(const PMQ_Msg_Options& opt, const char *fmt, va_list ap);
void __pmq_formatter(2, 3) pmq_msg_of(const PMQ_Msg_Options& opt, const char *fmt, ...);
#define PMQ_SOURCE_LOC ((PMQ_Source_Loc) { __FILE__, __LINE__ })
#define PMQ_MSG_OPTIONS(...) (PMQ_Msg_Options { PMQ_SOURCE_LOC, ##__VA_ARGS__ })
#define pmq_msg_lf(lvl, fmt, ...) \
pmq_msg_of(PMQ_MSG_OPTIONS((lvl), 0), fmt, ##__VA_ARGS__)
#define pmq_msg_lef(lvl, e, fmt, ...) \
pmq_msg_of(PMQ_MSG_OPTIONS(PMQ_MSG_OPT_ERRNO | (lvl), e), fmt, ##__VA_ARGS__)
#define pmq_msg_f(fmt, ...) \
pmq_msg_lf(PMQ_MSG_OPT_LVL_INFO, fmt, ##__VA_ARGS__)
#define pmq_msg_ef(e, fmt, ...) \
pmq_msg_lef(PMQ_MSG_OPT_LVL_INFO, (e), fmt, ##__VA_ARGS__)
#define pmq_debug_f(fmt, ...) \
pmq_msg_lf(PMQ_MSG_OPT_LVL_DEBUG, fmt, ##__VA_ARGS__)
#define pmq_debug_ef(e, fmt, ...) \
pmq_msg_lef(PMQ_MSG_OPT_LVL_DEBUG, (e), fmt, ##__VA_ARGS__)
#define pmq_warn_f(fmt, ...) \
pmq_msg_lf(PMQ_MSG_OPT_LVL_WARN, fmt, ##__VA_ARGS__)
#define pmq_warn_ef(e, fmt, ...) \
pmq_msg_lef(PMQ_MSG_OPT_LVL_WARN, (e), fmt, ##__VA_ARGS__)
#define pmq_perr_f(fmt, ...) \
pmq_msg_lf(PMQ_MSG_OPT_LVL_ERR, fmt, ##__VA_ARGS__)
#define pmq_perr_ef(e, fmt, ...) \
pmq_msg_lef(PMQ_MSG_OPT_LVL_ERR, (e), fmt, ##__VA_ARGS__)
// Low-level Logging I/O interface
struct Log_Message
{
size_t size;
char data[256 - sizeof (size_t)]; // for simplicity
};
void pmq_write_log_message(Log_Message const *input);
void pmq_read_log_message(Log_Message *output);
bool pmq_try_read_log_message_timeout_millis(Log_Message *output, int millis);
bool pmq_try_read_log_message(Log_Message *output);

View File

@@ -0,0 +1,212 @@
#pragma once
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "pmq_logging.hpp"
/* Wrapper around open() to open directories to put this awkward code in a
* central place. It is counter-intuitive but this is apparently how you're
* supposed to open directories on Unix, both the reading and "writing" (i.e.
* create, unlink, rename).
* The O_DIRECTORY flag is optional but the O_RDONLY is not; opening with
* O_RDWR | O_DIRECTORY fails with "is a directory" (weird!).
*
* Returns: fd to open directory or -1, in which case the errno variable must
* be handled as usual.
*/
static inline int pmq_open_dir(const char *path)
{
return open(path, O_RDONLY | O_DIRECTORY);
}
static inline int pmq_check_regular_file(int fd, const char *what_file)
{
struct stat st;
if (fstat(fd, &st) == -1)
{
pmq_perr_ef(errno, "Failed to fstat() the fd we opened");
return false;
}
if (! S_ISREG(st.st_mode))
{
pmq_perr_f("We opened the file '%s' expecting a regular file but it's not",
what_file);
return false;
}
return true;
}
// Note: returns an fd >= 0 if successful.
// On failure, -1 is returned and
// - if the file failed to open, errno indicates why the file failed to open.
// - if the file was opened successfuly but then closed again because it was not
// a regular file, errno is set to 0.
static inline int pmq_openat_regular_existing(
int basedir_fd, const char *relpath, int flags)
{
// only access mode may be specified -- no other flags
// In particular, O_CREAT would break the logic.
assert(flags == O_RDWR || flags == O_RDONLY || flags == O_WRONLY);
int fd = openat(basedir_fd, relpath, flags, 0);
if (fd == -1)
{
int e = errno;
pmq_perr_ef(errno, "Failed to openat() existing file='%s', flags=%x",
relpath, flags);
errno = e;
return fd;
}
/* The case where fd refers to something other than a regular file _may_
* have been caught by the kernel already above. For example, opening a
* directory using O_RDWR will fail. On the other hand, opening a directory
* using O_RDONLY will succeed.
* In any case, doing an explicit check here.
*/
if (! pmq_check_regular_file(fd, relpath))
{
close(fd);
errno = 0;
return -1;
}
return fd;
}
static inline int pmq_openat_regular_create(
int basedir_fd, const char *relpath, int flags, mode_t mode)
{
// only access mode may be specified -- no other flags
assert(flags == O_RDWR || flags == O_RDONLY || flags == O_WRONLY);
// But this func makes sure that the creation-flags are specified
flags |= O_CREAT | O_EXCL;
int fd = openat(basedir_fd, relpath, flags, mode);
if (fd == -1)
{
int e = errno;
pmq_perr_ef(errno, "Failed to openat() file='%s', flags=%x, mode=%o",
relpath, flags, (unsigned) mode);
errno = e;
return fd;
}
// all necessarily error handling should be done by the OS. (note O_EXCL)
return fd;
}
__pmq_artificial_func
void assert_sane_size(size_t size)
{
// check that size is representable as a ssize_t too.
// It is implementation defined how syscalls like write() handle write I/O sizes larger than SSIZE_T.
// So better don't even try to.
assert((size_t) (ssize_t) size == size);
}
static inline bool pmq_write_all(int fd, Untyped_Slice slice, const char *what)
{
assert_sane_size(slice.size());
while (slice.size())
{
ssize_t nw = write(fd, slice.data(), slice.size());
if (nw == -1)
{
int e = errno;
pmq_perr_ef(errno, "Failed to write %zu bytes to %s",
slice.size(), what);
errno = e;
return false;
}
slice = slice.offset_bytes((size_t) nw);
}
return true;
}
static inline bool pmq_pwrite_all(int fd, Untyped_Slice slice, off_t offset, const char *what)
{
assert_sane_size(slice.size());
while (slice.size())
{
ssize_t nw = pwrite(fd, slice.data(), slice.size(), offset);
if (nw == -1)
{
int e = errno;
pmq_perr_ef(errno, "Failed to pwrite() %zu bytes at offset %jd to %s",
slice.size(), (intmax_t) offset, what);
errno = e;
return false;
}
slice = slice.offset_bytes((size_t) nw);
offset += (size_t) nw;
}
return true;
}
static inline bool pmq_read_all(int fd, Untyped_Slice slice, const char *what)
{
assert_sane_size(slice.size());
while (slice.size())
{
ssize_t nw = read(fd, slice.data(), slice.size());
if (nw == -1)
{
int e = errno;
pmq_perr_ef(errno, "Failed to read %zu bytes from %s",
slice.size(), what);
errno = e;
return false;
}
slice = slice.offset_bytes((size_t) nw);
}
return true;
}
static inline bool pmq_pread_all(int fd, Untyped_Slice slice, off_t offset, const char *what)
{
assert_sane_size(slice.size());
while (slice.size())
{
ssize_t nw = pread(fd, slice.data(), slice.size(), offset);
if (nw == -1)
{
int e = errno;
pmq_perr_ef(errno, "Failed to pread() %zu bytes at offset %jd to %s",
slice.size(), (intmax_t) offset, what);
errno = e;
return false;
}
slice = slice.offset_bytes((size_t) nw);
offset += (size_t) nw;
}
return true;
}

View File

@@ -0,0 +1,46 @@
#pragma once
#include <unistd.h>
#include <sys/syscall.h>
#include <mutex>
#include <condition_variable>
#if PMQ_WITH_PROFILING
// NOTE: this requires building with matching headers in the include-path for
// tracy. Tracy is a "frame profiler": https://github.com/wolfpld/tracy
// If PMQ is built as part of a bigger project (e.g. BeeGFS metadata server)
// buildin with the proper settings may not be supported (yet).
// A build setup that supports tracy currently exists as part of the flex-docs
// repository (ask the BeeGFS team).
# include <Tracy.hpp>
# define PMQ_PROFILING_CTX FrameMark
# define PMQ_PROFILED_SCOPE(name) ZoneScopedN(name)
# define PMQ_PROFILED_FUNCTION ZoneScoped
# define PMQ_PROFILED_MUTEX(name) TracyLockable(std::mutex, name)
# define PMQ_PROFILED_CONDVAR(name) std::condition_variable_any name
# define PMQ_PROFILED_LOCK(name, themutex) \
auto& __ref__##name(themutex); \
std::lock_guard<LockableBase(std::mutex)> name(__ref__##name); \
LockMark(__ref__##name)
# define PMQ_PROFILED_UNIQUE_LOCK(name, themutex) \
auto& __ref__##name(themutex); \
std::unique_lock<LockableBase(std::mutex)> name(__ref__##name); \
LockMark(__ref__##name) \
#else
# define PMQ_PROFILING_CTX
# define PMQ_PROFILED_SCOPE(name)
# define PMQ_PROFILED_FUNCTION
# define PMQ_PROFILED_MUTEX(name) std::mutex name
# define PMQ_PROFILED_CONDVAR(name) std::condition_variable name
# define PMQ_PROFILED_LOCK(name, themutex) \
std::lock_guard<std::mutex> name(themutex)
# define PMQ_PROFILED_UNIQUE_LOCK(name, themutex) \
std::unique_lock<std::mutex> name(themutex)
#endif