New upstream version 8.1.0
This commit is contained in:
342
common/ib_lib/RDMASocketImpl.cpp
Normal file
342
common/ib_lib/RDMASocketImpl.cpp
Normal file
@@ -0,0 +1,342 @@
|
||||
#include <common/app/AbstractApp.h>
|
||||
#include <common/system/System.h>
|
||||
#include <common/threading/PThread.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include "RDMASocketImpl.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
|
||||
static RDMASocket* new_rdma_socket()
|
||||
{
|
||||
return new RDMASocketImpl();
|
||||
}
|
||||
|
||||
RDMASocket::ImplCallbacks beegfs_socket_impl = {
|
||||
IBVSocket_rdmaDevicesExist,
|
||||
IBVSocket_fork_init_once,
|
||||
new_rdma_socket,
|
||||
};
|
||||
|
||||
|
||||
// Note: Good tradeoff between throughput and mem usage (for SDR IB):
|
||||
// buf_num=64; buf_size=4*1024 (=> 512kB per socket for send and recv)
|
||||
|
||||
#define RDMASOCKET_DEFAULT_BUF_NUM (128) // moved to config
|
||||
#define RDMASOCKET_DEFAULT_BUF_SIZE (4*1024) // moved to config
|
||||
#define RDMASOCKET_DEFAULT_SL 0;
|
||||
|
||||
|
||||
/**
|
||||
* Note: Did you notice the rdmaForkInitOnce() method?
|
||||
*
|
||||
* @throw SocketException
|
||||
*/
|
||||
RDMASocketImpl::RDMASocketImpl()
|
||||
{
|
||||
this->sockType = NICADDRTYPE_RDMA;
|
||||
|
||||
commCfg.bufNum = RDMASOCKET_DEFAULT_BUF_NUM;
|
||||
commCfg.bufSize = RDMASOCKET_DEFAULT_BUF_SIZE;
|
||||
commCfg.serviceLevel = RDMASOCKET_DEFAULT_SL;
|
||||
|
||||
this->ibvsock = IBVSocket_construct();
|
||||
|
||||
if(!ibvsock)
|
||||
throw SocketException("RDMASocket allocation failed. SysErr: " + System::getErrString() );
|
||||
|
||||
if(!IBVSocket_getSockValid(this->ibvsock) )
|
||||
{
|
||||
IBVSocket_destruct(this->ibvsock);
|
||||
throw SocketException("RDMASocket initialization failed. SysErr: " + System::getErrString() );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: To be used by accept() only.
|
||||
*
|
||||
* @param sock will be closed/destructed by the destructor of this object
|
||||
*/
|
||||
RDMASocketImpl::RDMASocketImpl(IBVSocket* ibvsock, struct in_addr peerIP, std::string peername)
|
||||
{
|
||||
this->ibvsock = ibvsock;
|
||||
this->fd = IBVSocket_getRecvCompletionFD(ibvsock);
|
||||
|
||||
this->peerIP = peerIP;
|
||||
this->peername = std::move(peername);
|
||||
|
||||
this->sockType = NICADDRTYPE_RDMA;
|
||||
}
|
||||
|
||||
|
||||
RDMASocketImpl::~RDMASocketImpl()
|
||||
{
|
||||
if(ibvsock)
|
||||
IBVSocket_destruct(ibvsock);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throw SocketException
|
||||
*/
|
||||
void RDMASocketImpl::connect(const char* hostname, unsigned short port)
|
||||
{
|
||||
Socket::connect(hostname, port, AF_UNSPEC, SOCK_STREAM);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throw SocketException
|
||||
*/
|
||||
void RDMASocketImpl::connect(const struct sockaddr* serv_addr, socklen_t addrlen)
|
||||
{
|
||||
unsigned short peerPort = ntohs( ( (struct sockaddr_in*)serv_addr)->sin_port );
|
||||
|
||||
this->peerIP = ( (struct sockaddr_in*)serv_addr)->sin_addr;
|
||||
|
||||
// set peername if not done so already (e.g. by connect(hostname) )
|
||||
|
||||
if(peername.empty() )
|
||||
peername = Socket::endpointAddrToStr(peerIP, peerPort);
|
||||
|
||||
bool connRes = IBVSocket_connectByIP(ibvsock, peerIP, peerPort, &commCfg);
|
||||
if(!connRes)
|
||||
throw SocketConnectException(
|
||||
std::string("RDMASocket unable to connect to: ") + std::string(peername) );
|
||||
|
||||
|
||||
this->fd = IBVSocket_getRecvCompletionFD(ibvsock);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throw SocketException
|
||||
*/
|
||||
void RDMASocketImpl::bindToAddr(in_addr_t ipAddr, unsigned short port)
|
||||
{
|
||||
bool bindRes = IBVSocket_bindToAddr(ibvsock, ipAddr, port);
|
||||
if(!bindRes)
|
||||
throw SocketException("RDMASocket unable to bind to port: " +
|
||||
StringTk::uintToStr(port) );
|
||||
this->bindIP.s_addr = ipAddr;
|
||||
this->bindPort = port;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throw SocketException
|
||||
*/
|
||||
void RDMASocketImpl::listen()
|
||||
{
|
||||
bool listenRes = IBVSocket_listen(ibvsock);
|
||||
if(!listenRes)
|
||||
throw SocketException(std::string("RDMASocket unable to listen.") );
|
||||
|
||||
this->fd = IBVSocket_getConnManagerFD(ibvsock);
|
||||
peername = std::string("Listen(Port: ") + StringTk::uintToStr(bindPort) + std::string(")");
|
||||
}
|
||||
|
||||
/**
|
||||
* @return might return NULL in case an ignored event occurred; consider it to be a kind of false
|
||||
* alert (=> this is not an error)
|
||||
* @throw SocketException
|
||||
*/
|
||||
Socket* RDMASocketImpl::accept(struct sockaddr *addr, socklen_t *addrlen)
|
||||
{
|
||||
IBVSocket* acceptedIBVSocket = NULL;
|
||||
|
||||
IBVSocket_AcceptRes acceptRes = IBVSocket_accept(ibvsock, &acceptedIBVSocket, addr, addrlen);
|
||||
if(acceptRes == ACCEPTRES_IGNORE)
|
||||
return NULL;
|
||||
else
|
||||
if(acceptRes == ACCEPTRES_ERR)
|
||||
throw SocketException(std::string("RDMASocket unable to accept.") );
|
||||
|
||||
// prepare new socket object
|
||||
struct in_addr acceptIP = ( (struct sockaddr_in*)addr)->sin_addr;
|
||||
unsigned short acceptPort = ntohs( ( (struct sockaddr_in*)addr)->sin_port);
|
||||
|
||||
std::string acceptPeername = endpointAddrToStr(acceptIP, acceptPort);
|
||||
|
||||
Socket* acceptedSock = new RDMASocketImpl(acceptedIBVSocket, acceptIP, acceptPeername);
|
||||
|
||||
return acceptedSock;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throw SocketException
|
||||
*/
|
||||
void RDMASocketImpl::shutdown()
|
||||
{
|
||||
bool shutRes = IBVSocket_shutdown(ibvsock);
|
||||
if(!shutRes)
|
||||
throw SocketException(std::string("RDMASocket shutdown failed.") );
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: The RecvDisconnect-part is currently not implemented, so this is equal to the
|
||||
* normal shutdown() method.
|
||||
*
|
||||
* @throw SocketException
|
||||
*/
|
||||
void RDMASocketImpl::shutdownAndRecvDisconnect(int timeoutMS)
|
||||
{
|
||||
this->shutdown();
|
||||
}
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
/**
|
||||
* Note: This is a synchronous (blocking) version
|
||||
*
|
||||
* @throw SocketException
|
||||
*/
|
||||
ssize_t RDMASocketImpl::read(const void *buf, size_t len, unsigned lkey, const uint64_t rbuf, unsigned rkey)
|
||||
{
|
||||
size_t status = IBVSocket_read(this->ibvsock, (char *)buf, len, lkey, rbuf, rkey);
|
||||
return (status == 0) ? len : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: This is a synchronous (blocking) version
|
||||
*
|
||||
* @throw SocketException
|
||||
*/
|
||||
ssize_t RDMASocketImpl::write(const void *buf, size_t len, unsigned lkey, const uint64_t rbuf, unsigned rkey)
|
||||
{
|
||||
size_t status = IBVSocket_write(this->ibvsock, (char *)buf, len, lkey, rbuf, rkey);
|
||||
return (status == 0) ? len : -1;
|
||||
}
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
/**
|
||||
* Note: This is a synchronous (blocking) version
|
||||
*
|
||||
* @param flags ignored
|
||||
* @throw SocketException
|
||||
*/
|
||||
ssize_t RDMASocketImpl::send(const void *buf, size_t len, int flags)
|
||||
{
|
||||
ssize_t sendRes = IBVSocket_send(ibvsock, (const char*)buf, len, flags | MSG_NOSIGNAL);
|
||||
if(sendRes == (ssize_t)len)
|
||||
{
|
||||
stats->incVals.netSendBytes += len;
|
||||
return sendRes;
|
||||
}
|
||||
else
|
||||
if(sendRes > 0)
|
||||
{
|
||||
throw SocketException(
|
||||
std::string("send(): Sent only ") + StringTk::int64ToStr(sendRes) +
|
||||
std::string(" bytes of the requested ") + StringTk::int64ToStr(len) +
|
||||
std::string(" bytes of data") );
|
||||
}
|
||||
|
||||
throw SocketDisconnectException(
|
||||
"Disconnect during send() to: " + peername);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Note: This is a connection-based socket type, so to and tolen are ignored.
|
||||
*
|
||||
* @param flags ignored
|
||||
* @throw SocketException
|
||||
*/
|
||||
ssize_t RDMASocketImpl::sendto(const void *buf, size_t len, int flags,
|
||||
const struct sockaddr *to, socklen_t tolen)
|
||||
{
|
||||
ssize_t sendRes = IBVSocket_send(ibvsock, (const char*)buf, len, flags | MSG_NOSIGNAL);
|
||||
if(sendRes == (ssize_t)len)
|
||||
{
|
||||
stats->incVals.netSendBytes += len;
|
||||
return sendRes;
|
||||
}
|
||||
else
|
||||
if(sendRes > 0)
|
||||
{
|
||||
throw SocketException(
|
||||
std::string("send(): Sent only ") + StringTk::int64ToStr(sendRes) +
|
||||
std::string(" bytes of the requested ") + StringTk::int64ToStr(len) +
|
||||
std::string(" bytes of data") );
|
||||
}
|
||||
|
||||
throw SocketDisconnectException(
|
||||
std::string("Disconnect during send() to: ") + peername);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param flags ignored
|
||||
* @throw SocketException
|
||||
*/
|
||||
ssize_t RDMASocketImpl::recv(void *buf, size_t len, int flags)
|
||||
{
|
||||
ssize_t recvRes = IBVSocket_recv(ibvsock, (char*)buf, len, flags);
|
||||
if(recvRes > 0)
|
||||
{
|
||||
stats->incVals.netRecvBytes += recvRes;
|
||||
return recvRes;
|
||||
}
|
||||
|
||||
if(recvRes == 0)
|
||||
throw SocketDisconnectException(std::string("Soft disconnect from ") + peername);
|
||||
else
|
||||
throw SocketDisconnectException(std::string("Recv(): Hard disconnect from ") + peername);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Note: This is the default version, using poll only => see man pages of select(2) bugs section
|
||||
*
|
||||
* @param flags ignored
|
||||
* @throw SocketException
|
||||
*/
|
||||
ssize_t RDMASocketImpl::recvT(void *buf, size_t len, int flags, int timeoutMS)
|
||||
{
|
||||
ssize_t recvRes = IBVSocket_recvT(ibvsock, (char*)buf, len, flags, timeoutMS);
|
||||
if(recvRes > 0)
|
||||
{
|
||||
stats->incVals.netRecvBytes += recvRes;
|
||||
return recvRes;
|
||||
}
|
||||
|
||||
if(recvRes == -ETIMEDOUT)
|
||||
throw SocketTimeoutException("Receive timed out from: " + peername);
|
||||
else
|
||||
throw SocketDisconnectException("Received disconnect from: " + peername);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Note: Don't call this for sockets that have never been connected!
|
||||
*
|
||||
* @throw SocketException
|
||||
*/
|
||||
void RDMASocketImpl::checkConnection()
|
||||
{
|
||||
if(IBVSocket_checkConnection(ibvsock) )
|
||||
throw SocketDisconnectException("Disconnect from: " + peername);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find out whether it is possible to call recv without blocking.
|
||||
* Useful if the fd says there is incoming data (because that might be a false alarm
|
||||
* in case of an RDMASocket).
|
||||
*
|
||||
* @return 0 if no data immediately available, >0 if incoming data is available
|
||||
* @throw SocketException
|
||||
*/
|
||||
ssize_t RDMASocketImpl::nonblockingRecvCheck()
|
||||
{
|
||||
ssize_t checkRes = IBVSocket_nonblockingRecvCheck(ibvsock);
|
||||
if(checkRes < 0)
|
||||
throw SocketDisconnectException("Disconnect from: " + peername);
|
||||
|
||||
return checkRes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this after accept() to find out whether more events are waiting (for which
|
||||
* no notification would not be delivered through the file descriptor).
|
||||
*
|
||||
* @return true if more events are waiting and accept() should be called again
|
||||
*/
|
||||
bool RDMASocketImpl::checkDelayedEvents()
|
||||
{
|
||||
return IBVSocket_checkDelayedEvents(ibvsock);
|
||||
}
|
||||
83
common/ib_lib/RDMASocketImpl.h
Normal file
83
common/ib_lib/RDMASocketImpl.h
Normal file
@@ -0,0 +1,83 @@
|
||||
#include <net/sock/ibvsocket/IBVSocket.h>
|
||||
#include <common/net/sock/RDMASocket.h>
|
||||
|
||||
class RDMASocketImpl : public RDMASocket
|
||||
{
|
||||
public:
|
||||
RDMASocketImpl();
|
||||
virtual ~RDMASocketImpl() override;
|
||||
|
||||
virtual void connect(const char* hostname, unsigned short port) override;
|
||||
virtual void connect(const struct sockaddr* serv_addr, socklen_t addrlen) override;
|
||||
virtual void bindToAddr(in_addr_t ipAddr, unsigned short port) override;
|
||||
virtual void listen() override;
|
||||
virtual Socket* accept(struct sockaddr* addr, socklen_t* addrlen) override;
|
||||
virtual void shutdown() override;
|
||||
virtual void shutdownAndRecvDisconnect(int timeoutMS) override;
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
virtual ssize_t write(const void *buf, size_t len, unsigned lkey, const uint64_t rbuf, unsigned rkey) override;
|
||||
virtual ssize_t read(const void *buf, size_t len, unsigned lkey, const uint64_t rbuf, unsigned rkey) override;
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
virtual ssize_t send(const void *buf, size_t len, int flags) override;
|
||||
virtual ssize_t sendto(const void *buf, size_t len, int flags,
|
||||
const struct sockaddr *to, socklen_t tolen) override;
|
||||
|
||||
virtual ssize_t recv(void *buf, size_t len, int flags) override;
|
||||
virtual ssize_t recvT(void *buf, size_t len, int flags, int timeoutMS) override;
|
||||
|
||||
virtual void checkConnection() override;
|
||||
virtual ssize_t nonblockingRecvCheck() override;
|
||||
virtual bool checkDelayedEvents() override;
|
||||
|
||||
private:
|
||||
RDMASocketImpl(IBVSocket* ibvsock, struct in_addr peerIP, std::string peername);
|
||||
|
||||
IBVSocket* ibvsock;
|
||||
int fd; // for pollable interface (will be cm-fd for listening sockets and recv-channel-fd
|
||||
// for connected/accepted sockets)
|
||||
|
||||
IBVCommConfig commCfg;
|
||||
|
||||
public:
|
||||
// getters & setters
|
||||
virtual int getFD() const override
|
||||
{
|
||||
return fd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Only has an effect for unconnected sockets.
|
||||
*/
|
||||
virtual void setBuffers(unsigned bufNum, unsigned bufSize) override
|
||||
{
|
||||
commCfg.bufNum = bufNum;
|
||||
commCfg.bufSize = bufSize;
|
||||
}
|
||||
|
||||
virtual void setTimeouts(int connectMS, int flowSendMS, int pollMS)
|
||||
{
|
||||
IBVSocket_setTimeouts(ibvsock, connectMS, flowSendMS, pollMS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Only has an effect for unconnected sockets.
|
||||
*/
|
||||
virtual void setTypeOfService(uint8_t typeOfService) override
|
||||
{
|
||||
IBVSocket_setTypeOfService(ibvsock, typeOfService);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Only has an effect for testing.
|
||||
*/
|
||||
virtual void setConnectionRejectionRate(unsigned rate) override
|
||||
{
|
||||
IBVSocket_setConnectionRejectionRate(ibvsock, rate);
|
||||
}
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
extern RDMASocket::ImplCallbacks beegfs_socket_impl;
|
||||
}
|
||||
2503
common/ib_lib/net/sock/ibvsocket/IBVSocket.cpp
Normal file
2503
common/ib_lib/net/sock/ibvsocket/IBVSocket.cpp
Normal file
File diff suppressed because it is too large
Load Diff
200
common/ib_lib/net/sock/ibvsocket/IBVSocket.h
Normal file
200
common/ib_lib/net/sock/ibvsocket/IBVSocket.h
Normal file
@@ -0,0 +1,200 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/toolkit/serialization/Serialization.h>
|
||||
#include <common/Common.h>
|
||||
#include <net/sock/ibvsocket/OpenTk_IBVSocket.h>
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/poll.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netdb.h>
|
||||
#include <queue>
|
||||
|
||||
|
||||
|
||||
#include <infiniband/verbs.h>
|
||||
#include <rdma/rdma_cma.h>
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
#include <common/threading/Mutex.h>
|
||||
#include <unordered_map>
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
#define IBVSOCKET_RECV_WORK_ID_OFFSET (1)
|
||||
#define IBVSOCKET_SEND_WORK_ID_OFFSET (1 + IBVSOCKET_RECV_WORK_ID_OFFSET)
|
||||
#define IBVSOCKET_WRITE_WORK_ID (1 + IBVSOCKET_SEND_WORK_ID_OFFSET)
|
||||
#define IBVSOCKET_READ_WORK_ID (1 + IBVSOCKET_WRITE_WORK_ID)
|
||||
|
||||
#define IBVSOCKET_EVENTS_GATHER_NUM (64)
|
||||
|
||||
#define IBVSOCKET_PRIVATEDATA_STR "fhgfs0 " // must be exactly(!!) 8 bytes long
|
||||
#define IBVSOCKET_PRIVATEDATA_STR_LEN 8
|
||||
#define IBVSOCKET_PRIVATEDATA_PROTOCOL_VER 1
|
||||
|
||||
|
||||
struct IBVIncompleteRecv;
|
||||
typedef struct IBVIncompleteRecv IBVIncompleteRecv;
|
||||
struct IBVIncompleteSend;
|
||||
typedef struct IBVIncompleteSend IBVIncompleteSend;
|
||||
|
||||
struct IBVCommContext;
|
||||
typedef struct IBVCommContext IBVCommContext;
|
||||
|
||||
struct IBVCommDest;
|
||||
typedef struct IBVCommDest IBVCommDest;
|
||||
|
||||
typedef std::queue<struct rdma_cm_event*> CmEventQueue;
|
||||
#ifdef BEEGFS_NVFS
|
||||
typedef std::unordered_map<char *, struct ibv_mr *> MRMap;
|
||||
typedef std::unordered_map<uint64_t, int> CQMap;
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
|
||||
extern void __IBVSocket_initFromCommContext(IBVSocket* _this, struct rdma_cm_id* cm_id,
|
||||
IBVCommContext* commContext);
|
||||
extern IBVSocket* __IBVSocket_constructFromCommContext(struct rdma_cm_id* cm_id,
|
||||
IBVCommContext* commContext);
|
||||
|
||||
|
||||
extern int __IBVSocket_registerBuf(IBVCommContext* commContext, void* buf, size_t bufLen,
|
||||
struct ibv_mr **outMR);
|
||||
extern char* __IBVSocket_allocAndRegisterBuf(IBVCommContext* commContext, size_t bufLen,
|
||||
struct ibv_mr **outMR);
|
||||
|
||||
extern bool __IBVSocket_createCommContext(IBVSocket* _this, struct rdma_cm_id* cm_id,
|
||||
IBVCommConfig* commCfg, IBVCommContext** outCommContext);
|
||||
extern void __IBVSocket_cleanupCommContext(struct rdma_cm_id* cm_id, IBVCommContext* commContext);
|
||||
|
||||
extern void __IBVSocket_initCommDest(IBVCommContext* commContext, IBVCommDest* outDest);
|
||||
extern bool __IBVSocket_parseCommDest(const void* buf, size_t bufLen, IBVCommDest** outDest);
|
||||
|
||||
extern int __IBVSocket_postRecv(IBVSocket* _this, IBVCommContext* commContext, size_t bufIndex);
|
||||
extern int __IBVSocket_postWrite(IBVSocket* _this, IBVCommDest* remoteDest,
|
||||
struct ibv_mr* localMR, char* localBuf, int bufLen);
|
||||
extern int __IBVSocket_postRead(IBVSocket* _this, IBVCommDest* remoteDest,
|
||||
struct ibv_mr* localMR, char* localBuf, int bufLen);
|
||||
#ifdef BEEGFS_NVFS
|
||||
extern int __IBVSocket_postWrite(IBVSocket* _this, char* localBuf, int bufLen, unsigned lkey,
|
||||
uint64_t remoteBuf, unsigned rkey);
|
||||
extern int __IBVSocket_postRead(IBVSocket* _this, char* localBuf, int bufLen, unsigned lkey,
|
||||
uint64_t remoteBuf, unsigned rkey);
|
||||
#endif /* BEEGFS_NVFS */
|
||||
extern int __IBVSocket_postSend(IBVSocket* _this, size_t bufIndex, int bufLen);
|
||||
extern int __IBVSocket_recvWC(IBVSocket* _this, int timeoutMS, struct ibv_wc* outWC);
|
||||
|
||||
extern int __IBVSocket_flowControlOnRecv(IBVSocket* _this, int timeoutMS);
|
||||
extern void __IBVSocket_flowControlOnSendUpdateCounters(IBVSocket* _this);
|
||||
extern int __IBVSocket_flowControlOnSendWait(IBVSocket* _this, int timeoutMS);
|
||||
|
||||
extern int __IBVSocket_waitForRecvCompletionEvent(IBVSocket* _this, int timeoutMS,
|
||||
struct ibv_wc* outWC);
|
||||
extern int __IBVSocket_waitForTotalSendCompletion(IBVSocket* _this,
|
||||
int numSendElements, int numWriteElements, int numReadElements);
|
||||
extern int __IBVSocket_waitForUsedSendBufsReset(IBVSocket* _this);
|
||||
|
||||
extern ssize_t __IBVSocket_recvContinueIncomplete(IBVSocket* _this,
|
||||
char* buf, size_t bufLen);
|
||||
|
||||
extern void __IBVSocket_disconnect(IBVSocket* _this);
|
||||
extern void __IBVSocket_close(IBVSocket* _this);
|
||||
|
||||
extern bool __IBVSocket_initEpollFD(IBVSocket* _this);
|
||||
|
||||
extern const char* __IBVSocket_wcStatusStr(int wcStatusCode);
|
||||
|
||||
struct IBVIncompleteRecv
|
||||
{
|
||||
int isAvailable;
|
||||
int completedOffset;
|
||||
struct ibv_wc wc;
|
||||
};
|
||||
|
||||
struct IBVIncompleteSend
|
||||
{
|
||||
unsigned numAvailable;
|
||||
};
|
||||
|
||||
struct IBVTimeoutConfig
|
||||
{
|
||||
int connectMS;
|
||||
int flowSendMS;
|
||||
int pollMS;
|
||||
};
|
||||
|
||||
struct IBVCommContext
|
||||
{
|
||||
struct ibv_context* context;
|
||||
struct ibv_pd* pd; // protection domain
|
||||
struct ibv_mr* recvMR; // recvBuf mem region
|
||||
struct ibv_mr* sendMR; // sendBuf mem region
|
||||
struct ibv_mr* controlMR; // flow/flood control mem region
|
||||
struct ibv_mr* controlResetMR; // flow/flood control reset mem region
|
||||
|
||||
struct ibv_comp_channel* recvCompChannel; // recv completion event channel
|
||||
unsigned numUnackedRecvCompChannelEvents; // number of gathered events
|
||||
|
||||
struct ibv_cq* recvCQ; // recv completion queue
|
||||
struct ibv_cq* sendCQ; // send completion queue
|
||||
struct ibv_qp* qp; // send+recv queue pair
|
||||
|
||||
IBVCommConfig commCfg;
|
||||
char* recvBuf; // large alloc'ed and reg'ed buffer for recvBufs
|
||||
char** recvBufs; // points to chunks inside recvBuf
|
||||
char* sendBuf; // large alloc'ed and reg'ed buffer for sendBufs
|
||||
char** sendBufs; // points to chunks inside sendBuf
|
||||
volatile uint64_t numUsedSendBufs; // sender's flow/flood control counter (volatile!!)
|
||||
volatile uint64_t numUsedSendBufsReset; // flow/flood control reset value
|
||||
uint64_t numUsedRecvBufs; // receiver's flow/flood control (reset) counter
|
||||
unsigned numReceivedBufsLeft; // flow control v2 to avoid IB rnr timeout
|
||||
unsigned numSendBufsLeft; // flow control v2 to avoid IB rnr timeout
|
||||
|
||||
IBVIncompleteRecv incompleteRecv;
|
||||
IBVIncompleteSend incompleteSend;
|
||||
#ifdef BEEGFS_NVFS
|
||||
uint64_t wr_id;
|
||||
Mutex *cqMutex;
|
||||
CQMap *cqCompletions;
|
||||
MRMap *workerMRs;
|
||||
#endif /* BEEGFS_NVFS */
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
// Note: Make sure this struct has the same size on all architectures (because we use
|
||||
// sizeof(IBVCommDest) for private_data during handshake)
|
||||
struct IBVCommDest
|
||||
{
|
||||
char verificationStr[IBVSOCKET_PRIVATEDATA_STR_LEN];
|
||||
uint64_t protocolVersion;
|
||||
uint64_t vaddr;
|
||||
unsigned rkey;
|
||||
unsigned recvBufNum;
|
||||
unsigned recvBufSize;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
struct IBVSocket
|
||||
{
|
||||
struct rdma_event_channel* cm_channel;
|
||||
struct rdma_cm_id* cm_id;
|
||||
|
||||
IBVCommDest localDest;
|
||||
IBVCommDest* remoteDest;
|
||||
|
||||
IBVCommContext* commContext;
|
||||
int epollFD; // only for connected sockets, invalid (-1) for listeners
|
||||
|
||||
bool sockValid;
|
||||
int errState;
|
||||
|
||||
CmEventQueue* delayedCmEventsQ;
|
||||
|
||||
uint8_t typeOfService;
|
||||
|
||||
unsigned connectionRejectionRate;
|
||||
unsigned connectionRejectionCount;
|
||||
|
||||
IBVTimeoutConfig timeoutCfg;
|
||||
struct in_addr bindIP;
|
||||
};
|
||||
|
||||
|
||||
82
common/ib_lib/net/sock/ibvsocket/OpenTk_IBVSocket.h
Normal file
82
common/ib_lib/net/sock/ibvsocket/OpenTk_IBVSocket.h
Normal file
@@ -0,0 +1,82 @@
|
||||
#pragma once
|
||||
|
||||
#include <arpa/inet.h>
|
||||
|
||||
|
||||
/*
|
||||
* This is the interface of the ibverbs socket abstraction.
|
||||
*/
|
||||
|
||||
|
||||
struct IBVSocket;
|
||||
typedef struct IBVSocket IBVSocket;
|
||||
|
||||
struct IBVCommConfig;
|
||||
typedef struct IBVCommConfig IBVCommConfig;
|
||||
|
||||
|
||||
enum IBVSocket_AcceptRes
|
||||
{ACCEPTRES_ERR=0, ACCEPTRES_IGNORE=1, ACCEPTRES_SUCCESS=2};
|
||||
typedef enum IBVSocket_AcceptRes IBVSocket_AcceptRes;
|
||||
|
||||
|
||||
// construction/destruction
|
||||
extern void IBVSocket_init(IBVSocket* _this);
|
||||
extern IBVSocket* IBVSocket_construct();
|
||||
extern void IBVSocket_uninit(IBVSocket* _this);
|
||||
extern void IBVSocket_destruct(IBVSocket* _this);
|
||||
|
||||
// static
|
||||
extern bool IBVSocket_rdmaDevicesExist(void);
|
||||
extern void IBVSocket_fork_init_once(void);
|
||||
|
||||
// methods
|
||||
extern bool IBVSocket_connectByName(IBVSocket* _this, const char* hostname, unsigned short port,
|
||||
IBVCommConfig* commCfg);
|
||||
extern bool IBVSocket_connectByIP(IBVSocket* _this, struct in_addr ipaddress, unsigned short port,
|
||||
IBVCommConfig* commCfg);
|
||||
extern bool IBVSocket_bind(IBVSocket* _this, unsigned short port);
|
||||
extern bool IBVSocket_bindToAddr(IBVSocket* _this, in_addr_t ipAddr, unsigned short port);
|
||||
extern bool IBVSocket_listen(IBVSocket* _this);
|
||||
extern IBVSocket_AcceptRes IBVSocket_accept(IBVSocket* _this, IBVSocket** outAcceptedSock,
|
||||
struct sockaddr* peerAddr, socklen_t* peerAddrLen);
|
||||
extern bool IBVSocket_shutdown(IBVSocket* _this);
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
extern ssize_t IBVSocket_write(IBVSocket* _this, const char* buf, size_t bufLen, unsigned lkey,
|
||||
const uint64_t rbuf, unsigned rkey);
|
||||
extern ssize_t IBVSocket_read(IBVSocket* _this, const char* buf, size_t bufLen, unsigned lkey,
|
||||
const uint64_t rbuf, unsigned rkey);
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
extern ssize_t IBVSocket_recv(IBVSocket* _this, char* buf, size_t bufLen, int flags);
|
||||
extern ssize_t IBVSocket_recvT(IBVSocket* _this, char* buf, size_t bufLen, int flags,
|
||||
int timeoutMS);
|
||||
extern ssize_t IBVSocket_send(IBVSocket* _this, const char* buf, size_t bufLen, int flags);
|
||||
|
||||
extern int IBVSocket_checkConnection(IBVSocket* _this);
|
||||
extern ssize_t IBVSocket_nonblockingRecvCheck(IBVSocket* _this);
|
||||
extern bool IBVSocket_checkDelayedEvents(IBVSocket* _this);
|
||||
|
||||
|
||||
// getters & setters
|
||||
extern bool IBVSocket_getSockValid(IBVSocket* _this);
|
||||
extern int IBVSocket_getRecvCompletionFD(IBVSocket* _this);
|
||||
extern int IBVSocket_getConnManagerFD(IBVSocket* _this);
|
||||
extern void IBVSocket_setTypeOfService(IBVSocket* _this, uint8_t typeOfService);
|
||||
extern void IBVSocket_setTimeouts(IBVSocket* _this, int connectMS, int flowSendMS,
|
||||
int pollMS);
|
||||
|
||||
// testing methods
|
||||
extern void IBVSocket_setConnectionRejectionRate(IBVSocket* _this, unsigned rate);
|
||||
extern bool IBVSocket_connectionRejection(IBVSocket* _this);
|
||||
|
||||
|
||||
struct IBVCommConfig
|
||||
{
|
||||
unsigned bufNum; // number of available buffers
|
||||
unsigned bufSize; // size of each buffer
|
||||
uint8_t serviceLevel;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user