New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,200 @@
#pragma once
#include <common/toolkit/serialization/Serialization.h>
#include <common/Common.h>
#include <net/sock/ibvsocket/OpenTk_IBVSocket.h>
#include <arpa/inet.h>
#include <sys/poll.h>
#include <sys/socket.h>
#include <netdb.h>
#include <queue>
#include <infiniband/verbs.h>
#include <rdma/rdma_cma.h>
#ifdef BEEGFS_NVFS
#include <common/threading/Mutex.h>
#include <unordered_map>
#endif /* BEEGFS_NVFS */
#define IBVSOCKET_RECV_WORK_ID_OFFSET (1)
#define IBVSOCKET_SEND_WORK_ID_OFFSET (1 + IBVSOCKET_RECV_WORK_ID_OFFSET)
#define IBVSOCKET_WRITE_WORK_ID (1 + IBVSOCKET_SEND_WORK_ID_OFFSET)
#define IBVSOCKET_READ_WORK_ID (1 + IBVSOCKET_WRITE_WORK_ID)
#define IBVSOCKET_EVENTS_GATHER_NUM (64)
#define IBVSOCKET_PRIVATEDATA_STR "fhgfs0 " // must be exactly(!!) 8 bytes long
#define IBVSOCKET_PRIVATEDATA_STR_LEN 8
#define IBVSOCKET_PRIVATEDATA_PROTOCOL_VER 1
struct IBVIncompleteRecv;
typedef struct IBVIncompleteRecv IBVIncompleteRecv;
struct IBVIncompleteSend;
typedef struct IBVIncompleteSend IBVIncompleteSend;
struct IBVCommContext;
typedef struct IBVCommContext IBVCommContext;
struct IBVCommDest;
typedef struct IBVCommDest IBVCommDest;
typedef std::queue<struct rdma_cm_event*> CmEventQueue;
#ifdef BEEGFS_NVFS
typedef std::unordered_map<char *, struct ibv_mr *> MRMap;
typedef std::unordered_map<uint64_t, int> CQMap;
#endif /* BEEGFS_NVFS */
extern void __IBVSocket_initFromCommContext(IBVSocket* _this, struct rdma_cm_id* cm_id,
IBVCommContext* commContext);
extern IBVSocket* __IBVSocket_constructFromCommContext(struct rdma_cm_id* cm_id,
IBVCommContext* commContext);
extern int __IBVSocket_registerBuf(IBVCommContext* commContext, void* buf, size_t bufLen,
struct ibv_mr **outMR);
extern char* __IBVSocket_allocAndRegisterBuf(IBVCommContext* commContext, size_t bufLen,
struct ibv_mr **outMR);
extern bool __IBVSocket_createCommContext(IBVSocket* _this, struct rdma_cm_id* cm_id,
IBVCommConfig* commCfg, IBVCommContext** outCommContext);
extern void __IBVSocket_cleanupCommContext(struct rdma_cm_id* cm_id, IBVCommContext* commContext);
extern void __IBVSocket_initCommDest(IBVCommContext* commContext, IBVCommDest* outDest);
extern bool __IBVSocket_parseCommDest(const void* buf, size_t bufLen, IBVCommDest** outDest);
extern int __IBVSocket_postRecv(IBVSocket* _this, IBVCommContext* commContext, size_t bufIndex);
extern int __IBVSocket_postWrite(IBVSocket* _this, IBVCommDest* remoteDest,
struct ibv_mr* localMR, char* localBuf, int bufLen);
extern int __IBVSocket_postRead(IBVSocket* _this, IBVCommDest* remoteDest,
struct ibv_mr* localMR, char* localBuf, int bufLen);
#ifdef BEEGFS_NVFS
extern int __IBVSocket_postWrite(IBVSocket* _this, char* localBuf, int bufLen, unsigned lkey,
uint64_t remoteBuf, unsigned rkey);
extern int __IBVSocket_postRead(IBVSocket* _this, char* localBuf, int bufLen, unsigned lkey,
uint64_t remoteBuf, unsigned rkey);
#endif /* BEEGFS_NVFS */
extern int __IBVSocket_postSend(IBVSocket* _this, size_t bufIndex, int bufLen);
extern int __IBVSocket_recvWC(IBVSocket* _this, int timeoutMS, struct ibv_wc* outWC);
extern int __IBVSocket_flowControlOnRecv(IBVSocket* _this, int timeoutMS);
extern void __IBVSocket_flowControlOnSendUpdateCounters(IBVSocket* _this);
extern int __IBVSocket_flowControlOnSendWait(IBVSocket* _this, int timeoutMS);
extern int __IBVSocket_waitForRecvCompletionEvent(IBVSocket* _this, int timeoutMS,
struct ibv_wc* outWC);
extern int __IBVSocket_waitForTotalSendCompletion(IBVSocket* _this,
int numSendElements, int numWriteElements, int numReadElements);
extern int __IBVSocket_waitForUsedSendBufsReset(IBVSocket* _this);
extern ssize_t __IBVSocket_recvContinueIncomplete(IBVSocket* _this,
char* buf, size_t bufLen);
extern void __IBVSocket_disconnect(IBVSocket* _this);
extern void __IBVSocket_close(IBVSocket* _this);
extern bool __IBVSocket_initEpollFD(IBVSocket* _this);
extern const char* __IBVSocket_wcStatusStr(int wcStatusCode);
struct IBVIncompleteRecv
{
int isAvailable;
int completedOffset;
struct ibv_wc wc;
};
struct IBVIncompleteSend
{
unsigned numAvailable;
};
struct IBVTimeoutConfig
{
int connectMS;
int flowSendMS;
int pollMS;
};
struct IBVCommContext
{
struct ibv_context* context;
struct ibv_pd* pd; // protection domain
struct ibv_mr* recvMR; // recvBuf mem region
struct ibv_mr* sendMR; // sendBuf mem region
struct ibv_mr* controlMR; // flow/flood control mem region
struct ibv_mr* controlResetMR; // flow/flood control reset mem region
struct ibv_comp_channel* recvCompChannel; // recv completion event channel
unsigned numUnackedRecvCompChannelEvents; // number of gathered events
struct ibv_cq* recvCQ; // recv completion queue
struct ibv_cq* sendCQ; // send completion queue
struct ibv_qp* qp; // send+recv queue pair
IBVCommConfig commCfg;
char* recvBuf; // large alloc'ed and reg'ed buffer for recvBufs
char** recvBufs; // points to chunks inside recvBuf
char* sendBuf; // large alloc'ed and reg'ed buffer for sendBufs
char** sendBufs; // points to chunks inside sendBuf
volatile uint64_t numUsedSendBufs; // sender's flow/flood control counter (volatile!!)
volatile uint64_t numUsedSendBufsReset; // flow/flood control reset value
uint64_t numUsedRecvBufs; // receiver's flow/flood control (reset) counter
unsigned numReceivedBufsLeft; // flow control v2 to avoid IB rnr timeout
unsigned numSendBufsLeft; // flow control v2 to avoid IB rnr timeout
IBVIncompleteRecv incompleteRecv;
IBVIncompleteSend incompleteSend;
#ifdef BEEGFS_NVFS
uint64_t wr_id;
Mutex *cqMutex;
CQMap *cqCompletions;
MRMap *workerMRs;
#endif /* BEEGFS_NVFS */
};
#pragma pack(push, 1)
// Note: Make sure this struct has the same size on all architectures (because we use
// sizeof(IBVCommDest) for private_data during handshake)
struct IBVCommDest
{
char verificationStr[IBVSOCKET_PRIVATEDATA_STR_LEN];
uint64_t protocolVersion;
uint64_t vaddr;
unsigned rkey;
unsigned recvBufNum;
unsigned recvBufSize;
};
#pragma pack(pop)
struct IBVSocket
{
struct rdma_event_channel* cm_channel;
struct rdma_cm_id* cm_id;
IBVCommDest localDest;
IBVCommDest* remoteDest;
IBVCommContext* commContext;
int epollFD; // only for connected sockets, invalid (-1) for listeners
bool sockValid;
int errState;
CmEventQueue* delayedCmEventsQ;
uint8_t typeOfService;
unsigned connectionRejectionRate;
unsigned connectionRejectionCount;
IBVTimeoutConfig timeoutCfg;
struct in_addr bindIP;
};

View File

@@ -0,0 +1,82 @@
#pragma once
#include <arpa/inet.h>
/*
* This is the interface of the ibverbs socket abstraction.
*/
struct IBVSocket;
typedef struct IBVSocket IBVSocket;
struct IBVCommConfig;
typedef struct IBVCommConfig IBVCommConfig;
enum IBVSocket_AcceptRes
{ACCEPTRES_ERR=0, ACCEPTRES_IGNORE=1, ACCEPTRES_SUCCESS=2};
typedef enum IBVSocket_AcceptRes IBVSocket_AcceptRes;
// construction/destruction
extern void IBVSocket_init(IBVSocket* _this);
extern IBVSocket* IBVSocket_construct();
extern void IBVSocket_uninit(IBVSocket* _this);
extern void IBVSocket_destruct(IBVSocket* _this);
// static
extern bool IBVSocket_rdmaDevicesExist(void);
extern void IBVSocket_fork_init_once(void);
// methods
extern bool IBVSocket_connectByName(IBVSocket* _this, const char* hostname, unsigned short port,
IBVCommConfig* commCfg);
extern bool IBVSocket_connectByIP(IBVSocket* _this, struct in_addr ipaddress, unsigned short port,
IBVCommConfig* commCfg);
extern bool IBVSocket_bind(IBVSocket* _this, unsigned short port);
extern bool IBVSocket_bindToAddr(IBVSocket* _this, in_addr_t ipAddr, unsigned short port);
extern bool IBVSocket_listen(IBVSocket* _this);
extern IBVSocket_AcceptRes IBVSocket_accept(IBVSocket* _this, IBVSocket** outAcceptedSock,
struct sockaddr* peerAddr, socklen_t* peerAddrLen);
extern bool IBVSocket_shutdown(IBVSocket* _this);
#ifdef BEEGFS_NVFS
extern ssize_t IBVSocket_write(IBVSocket* _this, const char* buf, size_t bufLen, unsigned lkey,
const uint64_t rbuf, unsigned rkey);
extern ssize_t IBVSocket_read(IBVSocket* _this, const char* buf, size_t bufLen, unsigned lkey,
const uint64_t rbuf, unsigned rkey);
#endif /* BEEGFS_NVFS */
extern ssize_t IBVSocket_recv(IBVSocket* _this, char* buf, size_t bufLen, int flags);
extern ssize_t IBVSocket_recvT(IBVSocket* _this, char* buf, size_t bufLen, int flags,
int timeoutMS);
extern ssize_t IBVSocket_send(IBVSocket* _this, const char* buf, size_t bufLen, int flags);
extern int IBVSocket_checkConnection(IBVSocket* _this);
extern ssize_t IBVSocket_nonblockingRecvCheck(IBVSocket* _this);
extern bool IBVSocket_checkDelayedEvents(IBVSocket* _this);
// getters & setters
extern bool IBVSocket_getSockValid(IBVSocket* _this);
extern int IBVSocket_getRecvCompletionFD(IBVSocket* _this);
extern int IBVSocket_getConnManagerFD(IBVSocket* _this);
extern void IBVSocket_setTypeOfService(IBVSocket* _this, uint8_t typeOfService);
extern void IBVSocket_setTimeouts(IBVSocket* _this, int connectMS, int flowSendMS,
int pollMS);
// testing methods
extern void IBVSocket_setConnectionRejectionRate(IBVSocket* _this, unsigned rate);
extern bool IBVSocket_connectionRejection(IBVSocket* _this);
struct IBVCommConfig
{
unsigned bufNum; // number of available buffers
unsigned bufSize; // size of each buffer
uint8_t serviceLevel;
};