New upstream version 8.1.0
This commit is contained in:
2503
common/ib_lib/net/sock/ibvsocket/IBVSocket.cpp
Normal file
2503
common/ib_lib/net/sock/ibvsocket/IBVSocket.cpp
Normal file
File diff suppressed because it is too large
Load Diff
200
common/ib_lib/net/sock/ibvsocket/IBVSocket.h
Normal file
200
common/ib_lib/net/sock/ibvsocket/IBVSocket.h
Normal file
@@ -0,0 +1,200 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/toolkit/serialization/Serialization.h>
|
||||
#include <common/Common.h>
|
||||
#include <net/sock/ibvsocket/OpenTk_IBVSocket.h>
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/poll.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netdb.h>
|
||||
#include <queue>
|
||||
|
||||
|
||||
|
||||
#include <infiniband/verbs.h>
|
||||
#include <rdma/rdma_cma.h>
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
#include <common/threading/Mutex.h>
|
||||
#include <unordered_map>
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
#define IBVSOCKET_RECV_WORK_ID_OFFSET (1)
|
||||
#define IBVSOCKET_SEND_WORK_ID_OFFSET (1 + IBVSOCKET_RECV_WORK_ID_OFFSET)
|
||||
#define IBVSOCKET_WRITE_WORK_ID (1 + IBVSOCKET_SEND_WORK_ID_OFFSET)
|
||||
#define IBVSOCKET_READ_WORK_ID (1 + IBVSOCKET_WRITE_WORK_ID)
|
||||
|
||||
#define IBVSOCKET_EVENTS_GATHER_NUM (64)
|
||||
|
||||
#define IBVSOCKET_PRIVATEDATA_STR "fhgfs0 " // must be exactly(!!) 8 bytes long
|
||||
#define IBVSOCKET_PRIVATEDATA_STR_LEN 8
|
||||
#define IBVSOCKET_PRIVATEDATA_PROTOCOL_VER 1
|
||||
|
||||
|
||||
struct IBVIncompleteRecv;
|
||||
typedef struct IBVIncompleteRecv IBVIncompleteRecv;
|
||||
struct IBVIncompleteSend;
|
||||
typedef struct IBVIncompleteSend IBVIncompleteSend;
|
||||
|
||||
struct IBVCommContext;
|
||||
typedef struct IBVCommContext IBVCommContext;
|
||||
|
||||
struct IBVCommDest;
|
||||
typedef struct IBVCommDest IBVCommDest;
|
||||
|
||||
typedef std::queue<struct rdma_cm_event*> CmEventQueue;
|
||||
#ifdef BEEGFS_NVFS
|
||||
typedef std::unordered_map<char *, struct ibv_mr *> MRMap;
|
||||
typedef std::unordered_map<uint64_t, int> CQMap;
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
|
||||
extern void __IBVSocket_initFromCommContext(IBVSocket* _this, struct rdma_cm_id* cm_id,
|
||||
IBVCommContext* commContext);
|
||||
extern IBVSocket* __IBVSocket_constructFromCommContext(struct rdma_cm_id* cm_id,
|
||||
IBVCommContext* commContext);
|
||||
|
||||
|
||||
extern int __IBVSocket_registerBuf(IBVCommContext* commContext, void* buf, size_t bufLen,
|
||||
struct ibv_mr **outMR);
|
||||
extern char* __IBVSocket_allocAndRegisterBuf(IBVCommContext* commContext, size_t bufLen,
|
||||
struct ibv_mr **outMR);
|
||||
|
||||
extern bool __IBVSocket_createCommContext(IBVSocket* _this, struct rdma_cm_id* cm_id,
|
||||
IBVCommConfig* commCfg, IBVCommContext** outCommContext);
|
||||
extern void __IBVSocket_cleanupCommContext(struct rdma_cm_id* cm_id, IBVCommContext* commContext);
|
||||
|
||||
extern void __IBVSocket_initCommDest(IBVCommContext* commContext, IBVCommDest* outDest);
|
||||
extern bool __IBVSocket_parseCommDest(const void* buf, size_t bufLen, IBVCommDest** outDest);
|
||||
|
||||
extern int __IBVSocket_postRecv(IBVSocket* _this, IBVCommContext* commContext, size_t bufIndex);
|
||||
extern int __IBVSocket_postWrite(IBVSocket* _this, IBVCommDest* remoteDest,
|
||||
struct ibv_mr* localMR, char* localBuf, int bufLen);
|
||||
extern int __IBVSocket_postRead(IBVSocket* _this, IBVCommDest* remoteDest,
|
||||
struct ibv_mr* localMR, char* localBuf, int bufLen);
|
||||
#ifdef BEEGFS_NVFS
|
||||
extern int __IBVSocket_postWrite(IBVSocket* _this, char* localBuf, int bufLen, unsigned lkey,
|
||||
uint64_t remoteBuf, unsigned rkey);
|
||||
extern int __IBVSocket_postRead(IBVSocket* _this, char* localBuf, int bufLen, unsigned lkey,
|
||||
uint64_t remoteBuf, unsigned rkey);
|
||||
#endif /* BEEGFS_NVFS */
|
||||
extern int __IBVSocket_postSend(IBVSocket* _this, size_t bufIndex, int bufLen);
|
||||
extern int __IBVSocket_recvWC(IBVSocket* _this, int timeoutMS, struct ibv_wc* outWC);
|
||||
|
||||
extern int __IBVSocket_flowControlOnRecv(IBVSocket* _this, int timeoutMS);
|
||||
extern void __IBVSocket_flowControlOnSendUpdateCounters(IBVSocket* _this);
|
||||
extern int __IBVSocket_flowControlOnSendWait(IBVSocket* _this, int timeoutMS);
|
||||
|
||||
extern int __IBVSocket_waitForRecvCompletionEvent(IBVSocket* _this, int timeoutMS,
|
||||
struct ibv_wc* outWC);
|
||||
extern int __IBVSocket_waitForTotalSendCompletion(IBVSocket* _this,
|
||||
int numSendElements, int numWriteElements, int numReadElements);
|
||||
extern int __IBVSocket_waitForUsedSendBufsReset(IBVSocket* _this);
|
||||
|
||||
extern ssize_t __IBVSocket_recvContinueIncomplete(IBVSocket* _this,
|
||||
char* buf, size_t bufLen);
|
||||
|
||||
extern void __IBVSocket_disconnect(IBVSocket* _this);
|
||||
extern void __IBVSocket_close(IBVSocket* _this);
|
||||
|
||||
extern bool __IBVSocket_initEpollFD(IBVSocket* _this);
|
||||
|
||||
extern const char* __IBVSocket_wcStatusStr(int wcStatusCode);
|
||||
|
||||
struct IBVIncompleteRecv
|
||||
{
|
||||
int isAvailable;
|
||||
int completedOffset;
|
||||
struct ibv_wc wc;
|
||||
};
|
||||
|
||||
struct IBVIncompleteSend
|
||||
{
|
||||
unsigned numAvailable;
|
||||
};
|
||||
|
||||
struct IBVTimeoutConfig
|
||||
{
|
||||
int connectMS;
|
||||
int flowSendMS;
|
||||
int pollMS;
|
||||
};
|
||||
|
||||
struct IBVCommContext
|
||||
{
|
||||
struct ibv_context* context;
|
||||
struct ibv_pd* pd; // protection domain
|
||||
struct ibv_mr* recvMR; // recvBuf mem region
|
||||
struct ibv_mr* sendMR; // sendBuf mem region
|
||||
struct ibv_mr* controlMR; // flow/flood control mem region
|
||||
struct ibv_mr* controlResetMR; // flow/flood control reset mem region
|
||||
|
||||
struct ibv_comp_channel* recvCompChannel; // recv completion event channel
|
||||
unsigned numUnackedRecvCompChannelEvents; // number of gathered events
|
||||
|
||||
struct ibv_cq* recvCQ; // recv completion queue
|
||||
struct ibv_cq* sendCQ; // send completion queue
|
||||
struct ibv_qp* qp; // send+recv queue pair
|
||||
|
||||
IBVCommConfig commCfg;
|
||||
char* recvBuf; // large alloc'ed and reg'ed buffer for recvBufs
|
||||
char** recvBufs; // points to chunks inside recvBuf
|
||||
char* sendBuf; // large alloc'ed and reg'ed buffer for sendBufs
|
||||
char** sendBufs; // points to chunks inside sendBuf
|
||||
volatile uint64_t numUsedSendBufs; // sender's flow/flood control counter (volatile!!)
|
||||
volatile uint64_t numUsedSendBufsReset; // flow/flood control reset value
|
||||
uint64_t numUsedRecvBufs; // receiver's flow/flood control (reset) counter
|
||||
unsigned numReceivedBufsLeft; // flow control v2 to avoid IB rnr timeout
|
||||
unsigned numSendBufsLeft; // flow control v2 to avoid IB rnr timeout
|
||||
|
||||
IBVIncompleteRecv incompleteRecv;
|
||||
IBVIncompleteSend incompleteSend;
|
||||
#ifdef BEEGFS_NVFS
|
||||
uint64_t wr_id;
|
||||
Mutex *cqMutex;
|
||||
CQMap *cqCompletions;
|
||||
MRMap *workerMRs;
|
||||
#endif /* BEEGFS_NVFS */
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
// Note: Make sure this struct has the same size on all architectures (because we use
|
||||
// sizeof(IBVCommDest) for private_data during handshake)
|
||||
struct IBVCommDest
|
||||
{
|
||||
char verificationStr[IBVSOCKET_PRIVATEDATA_STR_LEN];
|
||||
uint64_t protocolVersion;
|
||||
uint64_t vaddr;
|
||||
unsigned rkey;
|
||||
unsigned recvBufNum;
|
||||
unsigned recvBufSize;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
struct IBVSocket
|
||||
{
|
||||
struct rdma_event_channel* cm_channel;
|
||||
struct rdma_cm_id* cm_id;
|
||||
|
||||
IBVCommDest localDest;
|
||||
IBVCommDest* remoteDest;
|
||||
|
||||
IBVCommContext* commContext;
|
||||
int epollFD; // only for connected sockets, invalid (-1) for listeners
|
||||
|
||||
bool sockValid;
|
||||
int errState;
|
||||
|
||||
CmEventQueue* delayedCmEventsQ;
|
||||
|
||||
uint8_t typeOfService;
|
||||
|
||||
unsigned connectionRejectionRate;
|
||||
unsigned connectionRejectionCount;
|
||||
|
||||
IBVTimeoutConfig timeoutCfg;
|
||||
struct in_addr bindIP;
|
||||
};
|
||||
|
||||
|
||||
82
common/ib_lib/net/sock/ibvsocket/OpenTk_IBVSocket.h
Normal file
82
common/ib_lib/net/sock/ibvsocket/OpenTk_IBVSocket.h
Normal file
@@ -0,0 +1,82 @@
|
||||
#pragma once
|
||||
|
||||
#include <arpa/inet.h>
|
||||
|
||||
|
||||
/*
|
||||
* This is the interface of the ibverbs socket abstraction.
|
||||
*/
|
||||
|
||||
|
||||
struct IBVSocket;
|
||||
typedef struct IBVSocket IBVSocket;
|
||||
|
||||
struct IBVCommConfig;
|
||||
typedef struct IBVCommConfig IBVCommConfig;
|
||||
|
||||
|
||||
enum IBVSocket_AcceptRes
|
||||
{ACCEPTRES_ERR=0, ACCEPTRES_IGNORE=1, ACCEPTRES_SUCCESS=2};
|
||||
typedef enum IBVSocket_AcceptRes IBVSocket_AcceptRes;
|
||||
|
||||
|
||||
// construction/destruction
|
||||
extern void IBVSocket_init(IBVSocket* _this);
|
||||
extern IBVSocket* IBVSocket_construct();
|
||||
extern void IBVSocket_uninit(IBVSocket* _this);
|
||||
extern void IBVSocket_destruct(IBVSocket* _this);
|
||||
|
||||
// static
|
||||
extern bool IBVSocket_rdmaDevicesExist(void);
|
||||
extern void IBVSocket_fork_init_once(void);
|
||||
|
||||
// methods
|
||||
extern bool IBVSocket_connectByName(IBVSocket* _this, const char* hostname, unsigned short port,
|
||||
IBVCommConfig* commCfg);
|
||||
extern bool IBVSocket_connectByIP(IBVSocket* _this, struct in_addr ipaddress, unsigned short port,
|
||||
IBVCommConfig* commCfg);
|
||||
extern bool IBVSocket_bind(IBVSocket* _this, unsigned short port);
|
||||
extern bool IBVSocket_bindToAddr(IBVSocket* _this, in_addr_t ipAddr, unsigned short port);
|
||||
extern bool IBVSocket_listen(IBVSocket* _this);
|
||||
extern IBVSocket_AcceptRes IBVSocket_accept(IBVSocket* _this, IBVSocket** outAcceptedSock,
|
||||
struct sockaddr* peerAddr, socklen_t* peerAddrLen);
|
||||
extern bool IBVSocket_shutdown(IBVSocket* _this);
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
extern ssize_t IBVSocket_write(IBVSocket* _this, const char* buf, size_t bufLen, unsigned lkey,
|
||||
const uint64_t rbuf, unsigned rkey);
|
||||
extern ssize_t IBVSocket_read(IBVSocket* _this, const char* buf, size_t bufLen, unsigned lkey,
|
||||
const uint64_t rbuf, unsigned rkey);
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
extern ssize_t IBVSocket_recv(IBVSocket* _this, char* buf, size_t bufLen, int flags);
|
||||
extern ssize_t IBVSocket_recvT(IBVSocket* _this, char* buf, size_t bufLen, int flags,
|
||||
int timeoutMS);
|
||||
extern ssize_t IBVSocket_send(IBVSocket* _this, const char* buf, size_t bufLen, int flags);
|
||||
|
||||
extern int IBVSocket_checkConnection(IBVSocket* _this);
|
||||
extern ssize_t IBVSocket_nonblockingRecvCheck(IBVSocket* _this);
|
||||
extern bool IBVSocket_checkDelayedEvents(IBVSocket* _this);
|
||||
|
||||
|
||||
// getters & setters
|
||||
extern bool IBVSocket_getSockValid(IBVSocket* _this);
|
||||
extern int IBVSocket_getRecvCompletionFD(IBVSocket* _this);
|
||||
extern int IBVSocket_getConnManagerFD(IBVSocket* _this);
|
||||
extern void IBVSocket_setTypeOfService(IBVSocket* _this, uint8_t typeOfService);
|
||||
extern void IBVSocket_setTimeouts(IBVSocket* _this, int connectMS, int flowSendMS,
|
||||
int pollMS);
|
||||
|
||||
// testing methods
|
||||
extern void IBVSocket_setConnectionRejectionRate(IBVSocket* _this, unsigned rate);
|
||||
extern bool IBVSocket_connectionRejection(IBVSocket* _this);
|
||||
|
||||
|
||||
struct IBVCommConfig
|
||||
{
|
||||
unsigned bufNum; // number of available buffers
|
||||
unsigned bufSize; // size of each buffer
|
||||
uint8_t serviceLevel;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user