343 lines
9.2 KiB
C++
343 lines
9.2 KiB
C++
#include <common/app/AbstractApp.h>
|
|
#include <common/system/System.h>
|
|
#include <common/threading/PThread.h>
|
|
#include <common/toolkit/StringTk.h>
|
|
#include "RDMASocketImpl.h"
|
|
|
|
#include <utility>
|
|
|
|
|
|
static RDMASocket* new_rdma_socket()
|
|
{
|
|
return new RDMASocketImpl();
|
|
}
|
|
|
|
RDMASocket::ImplCallbacks beegfs_socket_impl = {
|
|
IBVSocket_rdmaDevicesExist,
|
|
IBVSocket_fork_init_once,
|
|
new_rdma_socket,
|
|
};
|
|
|
|
|
|
// Note: Good tradeoff between throughput and mem usage (for SDR IB):
|
|
// buf_num=64; buf_size=4*1024 (=> 512kB per socket for send and recv)
|
|
|
|
#define RDMASOCKET_DEFAULT_BUF_NUM (128) // moved to config
|
|
#define RDMASOCKET_DEFAULT_BUF_SIZE (4*1024) // moved to config
|
|
#define RDMASOCKET_DEFAULT_SL 0;
|
|
|
|
|
|
/**
|
|
* Note: Did you notice the rdmaForkInitOnce() method?
|
|
*
|
|
* @throw SocketException
|
|
*/
|
|
RDMASocketImpl::RDMASocketImpl()
|
|
{
|
|
this->sockType = NICADDRTYPE_RDMA;
|
|
|
|
commCfg.bufNum = RDMASOCKET_DEFAULT_BUF_NUM;
|
|
commCfg.bufSize = RDMASOCKET_DEFAULT_BUF_SIZE;
|
|
commCfg.serviceLevel = RDMASOCKET_DEFAULT_SL;
|
|
|
|
this->ibvsock = IBVSocket_construct();
|
|
|
|
if(!ibvsock)
|
|
throw SocketException("RDMASocket allocation failed. SysErr: " + System::getErrString() );
|
|
|
|
if(!IBVSocket_getSockValid(this->ibvsock) )
|
|
{
|
|
IBVSocket_destruct(this->ibvsock);
|
|
throw SocketException("RDMASocket initialization failed. SysErr: " + System::getErrString() );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Note: To be used by accept() only.
|
|
*
|
|
* @param sock will be closed/destructed by the destructor of this object
|
|
*/
|
|
RDMASocketImpl::RDMASocketImpl(IBVSocket* ibvsock, struct in_addr peerIP, std::string peername)
|
|
{
|
|
this->ibvsock = ibvsock;
|
|
this->fd = IBVSocket_getRecvCompletionFD(ibvsock);
|
|
|
|
this->peerIP = peerIP;
|
|
this->peername = std::move(peername);
|
|
|
|
this->sockType = NICADDRTYPE_RDMA;
|
|
}
|
|
|
|
|
|
RDMASocketImpl::~RDMASocketImpl()
|
|
{
|
|
if(ibvsock)
|
|
IBVSocket_destruct(ibvsock);
|
|
}
|
|
|
|
/**
|
|
* @throw SocketException
|
|
*/
|
|
void RDMASocketImpl::connect(const char* hostname, unsigned short port)
|
|
{
|
|
Socket::connect(hostname, port, AF_UNSPEC, SOCK_STREAM);
|
|
}
|
|
|
|
/**
|
|
* @throw SocketException
|
|
*/
|
|
void RDMASocketImpl::connect(const struct sockaddr* serv_addr, socklen_t addrlen)
|
|
{
|
|
unsigned short peerPort = ntohs( ( (struct sockaddr_in*)serv_addr)->sin_port );
|
|
|
|
this->peerIP = ( (struct sockaddr_in*)serv_addr)->sin_addr;
|
|
|
|
// set peername if not done so already (e.g. by connect(hostname) )
|
|
|
|
if(peername.empty() )
|
|
peername = Socket::endpointAddrToStr(peerIP, peerPort);
|
|
|
|
bool connRes = IBVSocket_connectByIP(ibvsock, peerIP, peerPort, &commCfg);
|
|
if(!connRes)
|
|
throw SocketConnectException(
|
|
std::string("RDMASocket unable to connect to: ") + std::string(peername) );
|
|
|
|
|
|
this->fd = IBVSocket_getRecvCompletionFD(ibvsock);
|
|
}
|
|
|
|
/**
|
|
* @throw SocketException
|
|
*/
|
|
void RDMASocketImpl::bindToAddr(in_addr_t ipAddr, unsigned short port)
|
|
{
|
|
bool bindRes = IBVSocket_bindToAddr(ibvsock, ipAddr, port);
|
|
if(!bindRes)
|
|
throw SocketException("RDMASocket unable to bind to port: " +
|
|
StringTk::uintToStr(port) );
|
|
this->bindIP.s_addr = ipAddr;
|
|
this->bindPort = port;
|
|
}
|
|
|
|
/**
|
|
* @throw SocketException
|
|
*/
|
|
void RDMASocketImpl::listen()
|
|
{
|
|
bool listenRes = IBVSocket_listen(ibvsock);
|
|
if(!listenRes)
|
|
throw SocketException(std::string("RDMASocket unable to listen.") );
|
|
|
|
this->fd = IBVSocket_getConnManagerFD(ibvsock);
|
|
peername = std::string("Listen(Port: ") + StringTk::uintToStr(bindPort) + std::string(")");
|
|
}
|
|
|
|
/**
|
|
* @return might return NULL in case an ignored event occurred; consider it to be a kind of false
|
|
* alert (=> this is not an error)
|
|
* @throw SocketException
|
|
*/
|
|
Socket* RDMASocketImpl::accept(struct sockaddr *addr, socklen_t *addrlen)
|
|
{
|
|
IBVSocket* acceptedIBVSocket = NULL;
|
|
|
|
IBVSocket_AcceptRes acceptRes = IBVSocket_accept(ibvsock, &acceptedIBVSocket, addr, addrlen);
|
|
if(acceptRes == ACCEPTRES_IGNORE)
|
|
return NULL;
|
|
else
|
|
if(acceptRes == ACCEPTRES_ERR)
|
|
throw SocketException(std::string("RDMASocket unable to accept.") );
|
|
|
|
// prepare new socket object
|
|
struct in_addr acceptIP = ( (struct sockaddr_in*)addr)->sin_addr;
|
|
unsigned short acceptPort = ntohs( ( (struct sockaddr_in*)addr)->sin_port);
|
|
|
|
std::string acceptPeername = endpointAddrToStr(acceptIP, acceptPort);
|
|
|
|
Socket* acceptedSock = new RDMASocketImpl(acceptedIBVSocket, acceptIP, acceptPeername);
|
|
|
|
return acceptedSock;
|
|
}
|
|
|
|
/**
|
|
* @throw SocketException
|
|
*/
|
|
void RDMASocketImpl::shutdown()
|
|
{
|
|
bool shutRes = IBVSocket_shutdown(ibvsock);
|
|
if(!shutRes)
|
|
throw SocketException(std::string("RDMASocket shutdown failed.") );
|
|
}
|
|
|
|
/**
|
|
* Note: The RecvDisconnect-part is currently not implemented, so this is equal to the
|
|
* normal shutdown() method.
|
|
*
|
|
* @throw SocketException
|
|
*/
|
|
void RDMASocketImpl::shutdownAndRecvDisconnect(int timeoutMS)
|
|
{
|
|
this->shutdown();
|
|
}
|
|
|
|
#ifdef BEEGFS_NVFS
|
|
/**
|
|
* Note: This is a synchronous (blocking) version
|
|
*
|
|
* @throw SocketException
|
|
*/
|
|
ssize_t RDMASocketImpl::read(const void *buf, size_t len, unsigned lkey, const uint64_t rbuf, unsigned rkey)
|
|
{
|
|
size_t status = IBVSocket_read(this->ibvsock, (char *)buf, len, lkey, rbuf, rkey);
|
|
return (status == 0) ? len : -1;
|
|
}
|
|
|
|
/**
|
|
* Note: This is a synchronous (blocking) version
|
|
*
|
|
* @throw SocketException
|
|
*/
|
|
ssize_t RDMASocketImpl::write(const void *buf, size_t len, unsigned lkey, const uint64_t rbuf, unsigned rkey)
|
|
{
|
|
size_t status = IBVSocket_write(this->ibvsock, (char *)buf, len, lkey, rbuf, rkey);
|
|
return (status == 0) ? len : -1;
|
|
}
|
|
#endif /* BEEGFS_NVFS */
|
|
|
|
/**
|
|
* Note: This is a synchronous (blocking) version
|
|
*
|
|
* @param flags ignored
|
|
* @throw SocketException
|
|
*/
|
|
ssize_t RDMASocketImpl::send(const void *buf, size_t len, int flags)
|
|
{
|
|
ssize_t sendRes = IBVSocket_send(ibvsock, (const char*)buf, len, flags | MSG_NOSIGNAL);
|
|
if(sendRes == (ssize_t)len)
|
|
{
|
|
stats->incVals.netSendBytes += len;
|
|
return sendRes;
|
|
}
|
|
else
|
|
if(sendRes > 0)
|
|
{
|
|
throw SocketException(
|
|
std::string("send(): Sent only ") + StringTk::int64ToStr(sendRes) +
|
|
std::string(" bytes of the requested ") + StringTk::int64ToStr(len) +
|
|
std::string(" bytes of data") );
|
|
}
|
|
|
|
throw SocketDisconnectException(
|
|
"Disconnect during send() to: " + peername);
|
|
}
|
|
|
|
|
|
/**
|
|
* Note: This is a connection-based socket type, so to and tolen are ignored.
|
|
*
|
|
* @param flags ignored
|
|
* @throw SocketException
|
|
*/
|
|
ssize_t RDMASocketImpl::sendto(const void *buf, size_t len, int flags,
|
|
const struct sockaddr *to, socklen_t tolen)
|
|
{
|
|
ssize_t sendRes = IBVSocket_send(ibvsock, (const char*)buf, len, flags | MSG_NOSIGNAL);
|
|
if(sendRes == (ssize_t)len)
|
|
{
|
|
stats->incVals.netSendBytes += len;
|
|
return sendRes;
|
|
}
|
|
else
|
|
if(sendRes > 0)
|
|
{
|
|
throw SocketException(
|
|
std::string("send(): Sent only ") + StringTk::int64ToStr(sendRes) +
|
|
std::string(" bytes of the requested ") + StringTk::int64ToStr(len) +
|
|
std::string(" bytes of data") );
|
|
}
|
|
|
|
throw SocketDisconnectException(
|
|
std::string("Disconnect during send() to: ") + peername);
|
|
}
|
|
|
|
/**
|
|
* @param flags ignored
|
|
* @throw SocketException
|
|
*/
|
|
ssize_t RDMASocketImpl::recv(void *buf, size_t len, int flags)
|
|
{
|
|
ssize_t recvRes = IBVSocket_recv(ibvsock, (char*)buf, len, flags);
|
|
if(recvRes > 0)
|
|
{
|
|
stats->incVals.netRecvBytes += recvRes;
|
|
return recvRes;
|
|
}
|
|
|
|
if(recvRes == 0)
|
|
throw SocketDisconnectException(std::string("Soft disconnect from ") + peername);
|
|
else
|
|
throw SocketDisconnectException(std::string("Recv(): Hard disconnect from ") + peername);
|
|
}
|
|
|
|
|
|
/**
|
|
* Note: This is the default version, using poll only => see man pages of select(2) bugs section
|
|
*
|
|
* @param flags ignored
|
|
* @throw SocketException
|
|
*/
|
|
ssize_t RDMASocketImpl::recvT(void *buf, size_t len, int flags, int timeoutMS)
|
|
{
|
|
ssize_t recvRes = IBVSocket_recvT(ibvsock, (char*)buf, len, flags, timeoutMS);
|
|
if(recvRes > 0)
|
|
{
|
|
stats->incVals.netRecvBytes += recvRes;
|
|
return recvRes;
|
|
}
|
|
|
|
if(recvRes == -ETIMEDOUT)
|
|
throw SocketTimeoutException("Receive timed out from: " + peername);
|
|
else
|
|
throw SocketDisconnectException("Received disconnect from: " + peername);
|
|
}
|
|
|
|
|
|
/**
|
|
* Note: Don't call this for sockets that have never been connected!
|
|
*
|
|
* @throw SocketException
|
|
*/
|
|
void RDMASocketImpl::checkConnection()
|
|
{
|
|
if(IBVSocket_checkConnection(ibvsock) )
|
|
throw SocketDisconnectException("Disconnect from: " + peername);
|
|
}
|
|
|
|
/**
|
|
* Find out whether it is possible to call recv without blocking.
|
|
* Useful if the fd says there is incoming data (because that might be a false alarm
|
|
* in case of an RDMASocket).
|
|
*
|
|
* @return 0 if no data immediately available, >0 if incoming data is available
|
|
* @throw SocketException
|
|
*/
|
|
ssize_t RDMASocketImpl::nonblockingRecvCheck()
|
|
{
|
|
ssize_t checkRes = IBVSocket_nonblockingRecvCheck(ibvsock);
|
|
if(checkRes < 0)
|
|
throw SocketDisconnectException("Disconnect from: " + peername);
|
|
|
|
return checkRes;
|
|
}
|
|
|
|
/**
|
|
* Call this after accept() to find out whether more events are waiting (for which
|
|
* no notification would not be delivered through the file descriptor).
|
|
*
|
|
* @return true if more events are waiting and accept() should be called again
|
|
*/
|
|
bool RDMASocketImpl::checkDelayedEvents()
|
|
{
|
|
return IBVSocket_checkDelayedEvents(ibvsock);
|
|
}
|