New upstream version 8.1.0
This commit is contained in:
261
client_module/source/common/net/sock/NetworkInterfaceCard.c
Normal file
261
client_module/source/common/net/sock/NetworkInterfaceCard.c
Normal file
@@ -0,0 +1,261 @@
|
||||
#include <common/net/sock/RDMASocket.h>
|
||||
#include <common/net/sock/StandardSocket.h>
|
||||
#include <common/net/sock/NetworkInterfaceCard.h>
|
||||
#include <common/toolkit/ListTk.h>
|
||||
|
||||
#include <linux/if_arp.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/inetdevice.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
|
||||
#define NIC_STRING_LEN 1024
|
||||
|
||||
|
||||
static bool __NIC_fillNicAddress(struct net_device* dev, NicAddrType_t nicType,
|
||||
NicAddress* outAddr);
|
||||
|
||||
|
||||
void NIC_findAll(StrCpyList* allowedInterfaces, bool useRDMA, bool onlyRDMA,
|
||||
NicAddressList* outList)
|
||||
{
|
||||
// find standard TCP/IP interfaces
|
||||
__NIC_findAllTCP(allowedInterfaces, outList);
|
||||
|
||||
// find RDMA interfaces (based on TCP/IP interfaces query results)
|
||||
if(useRDMA && RDMASocket_rdmaDevicesExist() )
|
||||
{
|
||||
NicAddressList tcpInterfaces;
|
||||
|
||||
NicAddressList_init(&tcpInterfaces);
|
||||
|
||||
__NIC_findAllTCP(allowedInterfaces, &tcpInterfaces);
|
||||
|
||||
__NIC_filterInterfacesForRDMA(&tcpInterfaces, outList);
|
||||
|
||||
ListTk_kfreeNicAddressListElems(&tcpInterfaces);
|
||||
NicAddressList_uninit(&tcpInterfaces);
|
||||
}
|
||||
|
||||
if (onlyRDMA)
|
||||
{
|
||||
NicAddressListIter nicIter;
|
||||
NicAddressListIter_init(&nicIter, outList);
|
||||
while (!NicAddressListIter_end(&nicIter))
|
||||
{
|
||||
NicAddress* nicAddr = NicAddressListIter_value(&nicIter);
|
||||
if (nicAddr->nicType != NICADDRTYPE_RDMA)
|
||||
{
|
||||
nicIter = NicAddressListIter_remove(&nicIter);
|
||||
kfree(nicAddr);
|
||||
}
|
||||
else
|
||||
NicAddressListIter_next(&nicIter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void __NIC_findAllTCP(StrCpyList* allowedInterfaces, NicAddressList* outList)
|
||||
{
|
||||
struct net_device *dev;
|
||||
|
||||
// find standard TCP/IP interfaces
|
||||
|
||||
// foreach network device
|
||||
for (dev = first_net_device(&init_net); dev; dev = next_net_device(dev))
|
||||
{
|
||||
NicAddress* nicAddr = (NicAddress*)os_kmalloc(sizeof(NicAddress) );
|
||||
ssize_t metricByListPos = 0;
|
||||
|
||||
if(!nicAddr)
|
||||
{
|
||||
printk_fhgfs(KERN_WARNING, "%s:%d: memory allocation failed. size: %zu\n",
|
||||
__func__, __LINE__, sizeof(*nicAddr) );
|
||||
return;
|
||||
}
|
||||
|
||||
if(__NIC_fillNicAddress(dev, NICADDRTYPE_STANDARD, nicAddr) &&
|
||||
(!StrCpyList_length(allowedInterfaces) ||
|
||||
ListTk_listContains(nicAddr->name, allowedInterfaces, &metricByListPos) ) )
|
||||
{
|
||||
NicAddressList_append(outList, nicAddr);
|
||||
}
|
||||
else
|
||||
{ // netdevice rejected => clean up
|
||||
kfree(nicAddr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool __NIC_fillNicAddress(struct net_device* dev, NicAddrType_t nicType, NicAddress* outAddr)
|
||||
{
|
||||
struct ifreq ifr;
|
||||
struct in_device* in_dev;
|
||||
struct in_ifaddr *ifa;
|
||||
|
||||
#ifdef BEEGFS_RDMA
|
||||
outAddr->ibdev = NULL;
|
||||
#endif
|
||||
// name
|
||||
strncpy(outAddr->name, dev->name, IFNAMSIZ);
|
||||
|
||||
|
||||
// SIOCGIFFLAGS:
|
||||
// get interface flags
|
||||
ifr.ifr_flags = dev_get_flags(dev);
|
||||
|
||||
if(ifr.ifr_flags & IFF_LOOPBACK)
|
||||
return false; // loopback interface => skip
|
||||
|
||||
ifr.ifr_hwaddr.sa_family = dev->type;
|
||||
|
||||
// select which hardware types to process
|
||||
// (on Linux see /usr/include/linux/if_arp.h for the whole list)
|
||||
switch(ifr.ifr_hwaddr.sa_family)
|
||||
{
|
||||
case ARPHRD_LOOPBACK:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// copy nicType
|
||||
outAddr->nicType = nicType;
|
||||
|
||||
// ip address
|
||||
// note: based on inet_gifconf in /net/ipv4/devinet.c
|
||||
|
||||
in_dev = __in_dev_get_rtnl(dev);
|
||||
if(!in_dev)
|
||||
{
|
||||
printk_fhgfs_debug(KERN_NOTICE, "found interface without in_dev: %s\n", dev->name);
|
||||
return false;
|
||||
}
|
||||
|
||||
ifa = in_dev->ifa_list;
|
||||
if(!ifa)
|
||||
{
|
||||
printk_fhgfs_debug(KERN_NOTICE, "found interface without ifa_list: %s\n", dev->name);
|
||||
return false;
|
||||
}
|
||||
|
||||
outAddr->ipAddr.s_addr = ifa->ifa_local; // ip address
|
||||
|
||||
// code to read multiple addresses
|
||||
/*
|
||||
for (; ifa; ifa = ifa->ifa_next)
|
||||
{
|
||||
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
|
||||
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
|
||||
ifa->ifa_local;
|
||||
}
|
||||
*/
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return static string (not alloc'ed, so don't free it).
|
||||
*/
|
||||
const char* NIC_nicTypeToString(NicAddrType_t nicType)
|
||||
{
|
||||
switch(nicType)
|
||||
{
|
||||
case NICADDRTYPE_RDMA: return "RDMA";
|
||||
case NICADDRTYPE_STANDARD: return "TCP";
|
||||
|
||||
default: return "<unknown>";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string will be kalloced and must be kfreed later
|
||||
*/
|
||||
char* NIC_nicAddrToString(NicAddress* nicAddr)
|
||||
{
|
||||
char* nicAddrStr;
|
||||
char ipStr[NICADDRESS_IP_STR_LEN];
|
||||
const char* typeStr;
|
||||
|
||||
nicAddrStr = (char*)os_kmalloc(NIC_STRING_LEN);
|
||||
|
||||
NicAddress_ipToStr(nicAddr->ipAddr, ipStr);
|
||||
|
||||
if(nicAddr->nicType == NICADDRTYPE_RDMA)
|
||||
typeStr = "RDMA";
|
||||
else
|
||||
if(nicAddr->nicType == NICADDRTYPE_STANDARD)
|
||||
typeStr = "TCP";
|
||||
else
|
||||
typeStr = "Unknown";
|
||||
|
||||
snprintf(nicAddrStr, NIC_STRING_LEN, "%s[ip addr: %s; type: %s]", nicAddr->name, ipStr, typeStr);
|
||||
|
||||
return nicAddrStr;
|
||||
}
|
||||
|
||||
bool NIC_supportsRDMA(NicAddressList* nicList)
|
||||
{
|
||||
bool rdmaSupported = false;
|
||||
|
||||
NicAddressListIter iter;
|
||||
NicAddressListIter_init(&iter, nicList);
|
||||
|
||||
for( ; !NicAddressListIter_end(&iter); NicAddressListIter_next(&iter) )
|
||||
{
|
||||
if(NicAddressListIter_value(&iter)->nicType == NICADDRTYPE_RDMA)
|
||||
{
|
||||
rdmaSupported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rdmaSupported;
|
||||
}
|
||||
|
||||
void NIC_supportedCapabilities(NicAddressList* nicList, NicListCapabilities* outCapabilities)
|
||||
{
|
||||
outCapabilities->supportsRDMA = NIC_supportsRDMA(nicList);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks a list of TCP/IP interfaces for RDMA-capable interfaces.
|
||||
*/
|
||||
void __NIC_filterInterfacesForRDMA(NicAddressList* nicList, NicAddressList* outList)
|
||||
{
|
||||
// Note: This works by binding an RDMASocket to each IP of the passed list.
|
||||
|
||||
NicAddressListIter iter;
|
||||
NicAddressListIter_init(&iter, nicList);
|
||||
|
||||
for( ; !NicAddressListIter_end(&iter); NicAddressListIter_next(&iter) )
|
||||
{
|
||||
RDMASocket rdmaSock;
|
||||
Socket* sock = (Socket*)&rdmaSock;
|
||||
NicAddress* nicAddr = NicAddressListIter_value(&iter);
|
||||
bool bindRes;
|
||||
|
||||
if(!RDMASocket_init(&rdmaSock, nicAddr->ipAddr, NULL) )
|
||||
continue;
|
||||
|
||||
bindRes = sock->ops->bindToAddr(sock, nicAddr->ipAddr, 0);
|
||||
|
||||
if(bindRes)
|
||||
{ // we've got an RDMA-capable interface => append it to outList
|
||||
NicAddress* nicAddrCopy = os_kmalloc(sizeof(NicAddress) );
|
||||
|
||||
*nicAddrCopy = *nicAddr;
|
||||
|
||||
#ifdef BEEGFS_RDMA
|
||||
nicAddrCopy->ibdev = rdmaSock.ibvsock.cm_id->device;
|
||||
#endif
|
||||
nicAddrCopy->nicType = NICADDRTYPE_RDMA;
|
||||
|
||||
NicAddressList_append(outList, nicAddrCopy);
|
||||
}
|
||||
|
||||
sock->ops->uninit(sock);
|
||||
}
|
||||
}
|
||||
25
client_module/source/common/net/sock/NetworkInterfaceCard.h
Normal file
25
client_module/source/common/net/sock/NetworkInterfaceCard.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef NETWORKINTERFACECARD_H_
|
||||
#define NETWORKINTERFACECARD_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <common/net/sock/NicAddress.h>
|
||||
#include <common/net/sock/NicAddressList.h>
|
||||
#include <common/net/sock/NicAddressListIter.h>
|
||||
|
||||
|
||||
extern void NIC_findAll(StrCpyList* allowedInterfaces, bool useRDMA, bool onlyRDMA,
|
||||
NicAddressList* outList);
|
||||
|
||||
extern const char* NIC_nicTypeToString(NicAddrType_t nicType);
|
||||
extern char* NIC_nicAddrToString(NicAddress* nicAddr);
|
||||
|
||||
extern bool NIC_supportsRDMA(NicAddressList* nicList);
|
||||
extern void NIC_supportedCapabilities(NicAddressList* nicList,
|
||||
NicListCapabilities* outCapabilities);
|
||||
|
||||
extern void __NIC_findAllTCP(StrCpyList* allowedInterfaces, NicAddressList* outList);
|
||||
extern void __NIC_filterInterfacesForRDMA(NicAddressList* list, NicAddressList* outList);
|
||||
|
||||
|
||||
#endif /*NETWORKINTERFACECARD_H_*/
|
||||
34
client_module/source/common/net/sock/NicAddress.c
Normal file
34
client_module/source/common/net/sock/NicAddress.c
Normal file
@@ -0,0 +1,34 @@
|
||||
#include <common/net/sock/NicAddress.h>
|
||||
#include <common/toolkit/Serialization.h>
|
||||
|
||||
/**
|
||||
* @return true if lhs (left-hand side) is preferred compared to rhs
|
||||
*/
|
||||
bool NicAddress_preferenceComp(const NicAddress* lhs, const NicAddress* rhs)
|
||||
{
|
||||
// compares the preference of NICs
|
||||
// returns true if lhs is preferred compared to rhs
|
||||
|
||||
unsigned lhsHostOrderIP;
|
||||
unsigned rhsHostOrderIP;
|
||||
|
||||
// prefer RDMA NICs
|
||||
if( (lhs->nicType == NICADDRTYPE_RDMA) && (rhs->nicType != NICADDRTYPE_RDMA) )
|
||||
return true;
|
||||
if( (rhs->nicType == NICADDRTYPE_RDMA) && (lhs->nicType != NICADDRTYPE_RDMA) )
|
||||
return false;
|
||||
|
||||
// no bandwidth in client NicAddress
|
||||
// // prefer higher bandwidth
|
||||
// if(lhs->bandwidth > rhs->bandwidth)
|
||||
// return true;
|
||||
// if(lhs->bandwidth < rhs->bandwidth)
|
||||
// return false;
|
||||
|
||||
// prefer higher ipAddr
|
||||
lhsHostOrderIP = ntohl(lhs->ipAddr.s_addr);
|
||||
rhsHostOrderIP = ntohl(rhs->ipAddr.s_addr);
|
||||
|
||||
// this is the original IP-order version
|
||||
return lhsHostOrderIP > rhsHostOrderIP;
|
||||
}
|
||||
70
client_module/source/common/net/sock/NicAddress.h
Normal file
70
client_module/source/common/net/sock/NicAddress.h
Normal file
@@ -0,0 +1,70 @@
|
||||
#ifndef NICADDRESS_H_
|
||||
#define NICADDRESS_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <linux/if.h>
|
||||
|
||||
#define NICADDRESS_IP_STR_LEN 16
|
||||
|
||||
|
||||
enum NicAddrType;
|
||||
typedef enum NicAddrType NicAddrType_t;
|
||||
|
||||
struct NicAddress;
|
||||
typedef struct NicAddress NicAddress;
|
||||
|
||||
struct NicListCapabilities;
|
||||
typedef struct NicListCapabilities NicListCapabilities;
|
||||
|
||||
struct ib_device;
|
||||
|
||||
extern bool NicAddress_preferenceComp(const NicAddress* lhs, const NicAddress* rhs);
|
||||
|
||||
// inliners
|
||||
static inline void NicAddress_ipToStr(struct in_addr ipAddr, char* outStr);
|
||||
static inline bool NicAddress_equals(NicAddress* this, NicAddress* other);
|
||||
|
||||
|
||||
enum NicAddrType
|
||||
{
|
||||
NICADDRTYPE_STANDARD = 0,
|
||||
// removed: NICADDRTYPE_SDP = 1,
|
||||
NICADDRTYPE_RDMA = 2
|
||||
};
|
||||
|
||||
struct NicAddress
|
||||
{
|
||||
struct in_addr ipAddr;
|
||||
NicAddrType_t nicType;
|
||||
char name[IFNAMSIZ];
|
||||
#ifdef BEEGFS_RDMA
|
||||
struct ib_device *ibdev;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct NicListCapabilities
|
||||
{
|
||||
bool supportsRDMA;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @param outStr must be at least NICADDRESS_STR_LEN bytes long
|
||||
*/
|
||||
void NicAddress_ipToStr(struct in_addr ipAddr, char* outStr)
|
||||
{
|
||||
u8* ipArray = (u8*)&ipAddr.s_addr;
|
||||
|
||||
sprintf(outStr, "%u.%u.%u.%u", ipArray[0], ipArray[1], ipArray[2], ipArray[3]);
|
||||
}
|
||||
|
||||
bool NicAddress_equals(NicAddress* this, NicAddress* other)
|
||||
{
|
||||
return (this->ipAddr.s_addr == other->ipAddr.s_addr) &&
|
||||
(this->nicType == other->nicType) &&
|
||||
!strncmp(this->name, other->name, IFNAMSIZ);
|
||||
}
|
||||
|
||||
#endif /*NICADDRESS_H_*/
|
||||
35
client_module/source/common/net/sock/NicAddressList.c
Normal file
35
client_module/source/common/net/sock/NicAddressList.c
Normal file
@@ -0,0 +1,35 @@
|
||||
#include <common/net/sock/RDMASocket.h>
|
||||
#include <common/net/sock/StandardSocket.h>
|
||||
#include <common/net/sock/NetworkInterfaceCard.h>
|
||||
#include <common/net/sock/NicAddressList.h>
|
||||
|
||||
#if 0
|
||||
#include <linux/if_arp.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/inetdevice.h>
|
||||
#include <net/sock.h>
|
||||
#endif
|
||||
|
||||
bool NicAddressList_equals(NicAddressList* this, NicAddressList* other)
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
if (NicAddressList_length(this) == NicAddressList_length(other))
|
||||
{
|
||||
PointerListIter thisIter;
|
||||
PointerListIter otherIter;
|
||||
|
||||
PointerListIter_init(&thisIter, (PointerList*) this);
|
||||
PointerListIter_init(&otherIter, (PointerList*) other);
|
||||
|
||||
for (result = true;
|
||||
result == true && !PointerListIter_end(&thisIter) && !PointerListIter_end(&otherIter);
|
||||
PointerListIter_next(&thisIter), PointerListIter_next(&otherIter))
|
||||
{
|
||||
result = NicAddress_equals((NicAddress*) PointerListIter_value(&thisIter),
|
||||
(NicAddress*) PointerListIter_value(&otherIter));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
45
client_module/source/common/net/sock/NicAddressList.h
Normal file
45
client_module/source/common/net/sock/NicAddressList.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#ifndef NICADDRESSLIST_H_
|
||||
#define NICADDRESSLIST_H_
|
||||
|
||||
#include <common/toolkit/list/PointerList.h>
|
||||
#include <common/toolkit/list/PointerListIter.h>
|
||||
#include <common/Common.h>
|
||||
#include "NicAddress.h"
|
||||
|
||||
struct NicAddressList;
|
||||
typedef struct NicAddressList NicAddressList;
|
||||
|
||||
static inline void NicAddressList_init(NicAddressList* this);
|
||||
static inline void NicAddressList_uninit(NicAddressList* this);
|
||||
static inline void NicAddressList_append(NicAddressList* this, NicAddress* nicAddress);
|
||||
static inline size_t NicAddressList_length(NicAddressList* this);
|
||||
|
||||
extern bool NicAddressList_equals(NicAddressList* this, NicAddressList* other);
|
||||
|
||||
struct NicAddressList
|
||||
{
|
||||
struct PointerList pointerList;
|
||||
};
|
||||
|
||||
|
||||
void NicAddressList_init(NicAddressList* this)
|
||||
{
|
||||
PointerList_init( (PointerList*)this);
|
||||
}
|
||||
|
||||
void NicAddressList_uninit(NicAddressList* this)
|
||||
{
|
||||
PointerList_uninit( (PointerList*)this);
|
||||
}
|
||||
|
||||
void NicAddressList_append(NicAddressList* this, NicAddress* nicAddress)
|
||||
{
|
||||
PointerList_append( (PointerList*)this, nicAddress);
|
||||
}
|
||||
|
||||
size_t NicAddressList_length(NicAddressList* this)
|
||||
{
|
||||
return PointerList_length( (PointerList*)this);
|
||||
}
|
||||
|
||||
#endif /*NICADDRESSLIST_H_*/
|
||||
59
client_module/source/common/net/sock/NicAddressListIter.h
Normal file
59
client_module/source/common/net/sock/NicAddressListIter.h
Normal file
@@ -0,0 +1,59 @@
|
||||
#ifndef NICADDRESSLISTITER_H_
|
||||
#define NICADDRESSLISTITER_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include "NicAddressList.h"
|
||||
|
||||
struct NicAddressListIter;
|
||||
typedef struct NicAddressListIter NicAddressListIter;
|
||||
|
||||
static inline void NicAddressListIter_init(NicAddressListIter* this, NicAddressList* list);
|
||||
static inline void NicAddressListIter_next(NicAddressListIter* this);
|
||||
static inline NicAddress* NicAddressListIter_value(NicAddressListIter* this);
|
||||
static inline bool NicAddressListIter_end(NicAddressListIter* this);
|
||||
static inline NicAddressListIter NicAddressListIter_remove(NicAddressListIter* this);
|
||||
|
||||
|
||||
struct NicAddressListIter
|
||||
{
|
||||
PointerListIter pointerListIter;
|
||||
};
|
||||
|
||||
|
||||
void NicAddressListIter_init(NicAddressListIter* this, NicAddressList* list)
|
||||
{
|
||||
PointerListIter_init( (PointerListIter*)this, (PointerList*)list);
|
||||
}
|
||||
|
||||
void NicAddressListIter_next(NicAddressListIter* this)
|
||||
{
|
||||
PointerListIter_next( (PointerListIter*)this);
|
||||
}
|
||||
|
||||
NicAddress* NicAddressListIter_value(NicAddressListIter* this)
|
||||
{
|
||||
return (NicAddress*)PointerListIter_value( (PointerListIter*)this);
|
||||
}
|
||||
|
||||
bool NicAddressListIter_end(NicAddressListIter* this)
|
||||
{
|
||||
return PointerListIter_end( (PointerListIter*)this);
|
||||
}
|
||||
|
||||
/**
|
||||
* note: the current iterator becomes invalid after the call (use the returned iterator)
|
||||
* @return the new iterator that points to the element just behind the erased one
|
||||
*/
|
||||
NicAddressListIter NicAddressListIter_remove(NicAddressListIter* this)
|
||||
{
|
||||
NicAddressListIter newIter = *this;
|
||||
|
||||
NicAddressListIter_next(&newIter); // the new iter that will be returned
|
||||
|
||||
PointerListIter_remove( (PointerListIter*)this);
|
||||
|
||||
return newIter;
|
||||
}
|
||||
|
||||
|
||||
#endif /*NICADDRESSLISTITER_H_*/
|
||||
137
client_module/source/common/net/sock/NicAddressStats.h
Normal file
137
client_module/source/common/net/sock/NicAddressStats.h
Normal file
@@ -0,0 +1,137 @@
|
||||
#ifndef NICADDRESSSTATS_H_
|
||||
#define NICADDRESSSTATS_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <common/toolkit/Time.h>
|
||||
#ifdef BEEGFS_RDMA
|
||||
#include <rdma/ib_verbs.h>
|
||||
#endif
|
||||
|
||||
struct NicAddressStats;
|
||||
typedef struct NicAddressStats NicAddressStats;
|
||||
|
||||
static inline void NicAddressStats_init(NicAddressStats* this, NicAddress* nic);
|
||||
static inline void NicAddressStats_uninit(NicAddressStats* this);
|
||||
/**
|
||||
* Called when an associated NIC has gone down. This indicates
|
||||
* that this particular statistic should not be considered for load balancing.
|
||||
*/
|
||||
static inline void NicAddressStats_invalidate(NicAddressStats* this);
|
||||
/**
|
||||
* Called when an associated NIC has come online. This updates the internal NicAddress
|
||||
* and indicates that this particular statistic should be considered for load balancing.
|
||||
*/
|
||||
static inline void NicAddressStats_setValid(NicAddressStats* this, NicAddress* nic);
|
||||
static inline int NicAddressStats_comparePriority(NicAddressStats* this, NicAddressStats* o,
|
||||
int numa);
|
||||
static inline void NicAddressStats_updateUsed(NicAddressStats* this);
|
||||
static inline void NicAddressStats_updateLastError(NicAddressStats* this);
|
||||
static inline bool NicAddressStats_lastErrorExpired(NicAddressStats* this, Time* now,
|
||||
int expirationSecs);
|
||||
static inline bool NicAddressStats_usable(NicAddressStats* this, int maxConns);
|
||||
|
||||
struct NicAddressStats
|
||||
{
|
||||
NicAddress nic;
|
||||
int established;
|
||||
int available;
|
||||
Time used;
|
||||
Time lastError;
|
||||
/**
|
||||
* nicValid indicates if the NicAddress can be used for connections.
|
||||
* This may be tracking stats for a device that has gone offline.
|
||||
*/
|
||||
bool nicValid;
|
||||
};
|
||||
|
||||
void NicAddressStats_init(NicAddressStats* this, NicAddress* nic)
|
||||
{
|
||||
this->nic = *nic;
|
||||
this->established = 0;
|
||||
this->available = 0;
|
||||
this->nicValid = true;
|
||||
Time_initZero(&this->used);
|
||||
Time_initZero(&this->lastError);
|
||||
}
|
||||
|
||||
void NicAddressStats_uninit(NicAddressStats* this)
|
||||
{
|
||||
}
|
||||
|
||||
void NicAddressStats_invalidate(NicAddressStats* this)
|
||||
{
|
||||
this->nicValid = false;
|
||||
#ifdef BEEGFS_RDMA
|
||||
this->nic.ibdev = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
void NicAddressStats_setValid(NicAddressStats* this, NicAddress* nic)
|
||||
{
|
||||
this->nicValid = true;
|
||||
this->nic = *nic;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare the priority of this and o.
|
||||
*
|
||||
* Return value is < 0 if this has higher priority, > 0 if o has higher priority.
|
||||
*/
|
||||
int NicAddressStats_comparePriority(NicAddressStats* this, NicAddressStats* o,
|
||||
int numa)
|
||||
{
|
||||
int rc;
|
||||
|
||||
#ifdef BEEGFS_RDMA
|
||||
// device on the same numa node as current thread has higher priority
|
||||
if (likely(this->nic.ibdev && o->nic.ibdev))
|
||||
{
|
||||
int thisNode = this->nic.ibdev->dma_device->numa_node;
|
||||
int oNode = o->nic.ibdev->dma_device->numa_node;
|
||||
if (thisNode != oNode)
|
||||
{
|
||||
if (thisNode == numa)
|
||||
return -1;
|
||||
if (oNode == numa)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// device with more available connections has higher priority
|
||||
rc = o->available - this->available;
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
// device with less established connections has higher priority
|
||||
rc = this->established - o->established;
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
// device used less recently has higher priority
|
||||
return Time_compare(&this->used, &o->used);
|
||||
}
|
||||
|
||||
void NicAddressStats_updateUsed(NicAddressStats* this)
|
||||
{
|
||||
Time_setToNow(&this->used);
|
||||
}
|
||||
|
||||
void NicAddressStats_updateLastError(NicAddressStats* this)
|
||||
{
|
||||
Time_setToNow(&this->lastError);
|
||||
}
|
||||
|
||||
bool NicAddressStats_lastErrorExpired(NicAddressStats* this, Time* now, int expirationSecs)
|
||||
{
|
||||
return Time_elapsedSinceMS(now, &this->lastError) >= (expirationSecs * 1000);
|
||||
}
|
||||
|
||||
bool NicAddressStats_usable(NicAddressStats* this, int maxConns)
|
||||
{
|
||||
#ifdef BEEGFS_RDMA
|
||||
if (unlikely(!this->nic.ibdev))
|
||||
return false;
|
||||
#endif
|
||||
return this->available > 0 || this->established < maxConns;
|
||||
}
|
||||
|
||||
#endif /*NICADDRESSSTATS_H_*/
|
||||
42
client_module/source/common/net/sock/NicAddressStatsList.h
Normal file
42
client_module/source/common/net/sock/NicAddressStatsList.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef NICADDRESSSTATSLIST_H_
|
||||
#define NICADDRESSSTATSLIST_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <common/toolkit/list/PointerList.h>
|
||||
#include "NicAddressStats.h"
|
||||
|
||||
struct NicAddressStatsList;
|
||||
typedef struct NicAddressStatsList NicAddressStatsList;
|
||||
|
||||
static inline void NicAddressStatsList_init(NicAddressStatsList* this);
|
||||
static inline void NicAddressStatsList_uninit(NicAddressStatsList* this);
|
||||
static inline void NicAddressStatsList_append(NicAddressStatsList* this, NicAddressStats* stats);
|
||||
static inline size_t NicAddressStatsList_length(NicAddressStatsList* this);
|
||||
|
||||
struct NicAddressStatsList
|
||||
{
|
||||
PointerList pointerList;
|
||||
};
|
||||
|
||||
void NicAddressStatsList_init(NicAddressStatsList* this)
|
||||
{
|
||||
PointerList_init( (PointerList*)this);
|
||||
}
|
||||
|
||||
void NicAddressStatsList_uninit(NicAddressStatsList* this)
|
||||
{
|
||||
PointerList_uninit( (PointerList*)this);
|
||||
}
|
||||
|
||||
void NicAddressStatsList_append(NicAddressStatsList* this, struct NicAddressStats* stats)
|
||||
{
|
||||
PointerList_append( (PointerList*)this, stats);
|
||||
}
|
||||
|
||||
static inline size_t NicAddressStatsList_length(NicAddressStatsList* this)
|
||||
{
|
||||
return PointerList_length( (PointerList*)this);
|
||||
}
|
||||
|
||||
|
||||
#endif /*NICADDRESSSTATSLIST_H_*/
|
||||
@@ -0,0 +1,56 @@
|
||||
#ifndef NICADDRESSSTATSLISTITER_H_
|
||||
#define NICADDRESSSTATSLISTITER_H_
|
||||
|
||||
#include <common/toolkit/list/PointerListIter.h>
|
||||
#include "NicAddressStatsList.h"
|
||||
|
||||
struct NicAddressStatsListIter;
|
||||
typedef struct NicAddressStatsListIter NicAddressStatsListIter;
|
||||
|
||||
static inline void NicAddressStatsListIter_init(NicAddressStatsListIter* this, NicAddressStatsList* list);
|
||||
static inline void NicAddressStatsListIter_next(NicAddressStatsListIter* this);
|
||||
static inline NicAddressStats* NicAddressStatsListIter_value(NicAddressStatsListIter* this);
|
||||
static inline bool NicAddressStatsListIter_end(NicAddressStatsListIter* this);
|
||||
static inline NicAddressStatsListIter NicAddressStatsListIter_remove(NicAddressStatsListIter* this);
|
||||
|
||||
struct NicAddressStatsListIter
|
||||
{
|
||||
PointerListIter pointerListIter;
|
||||
};
|
||||
|
||||
void NicAddressStatsListIter_init(NicAddressStatsListIter* this, NicAddressStatsList* list)
|
||||
{
|
||||
PointerListIter_init( (PointerListIter*)this, (PointerList*)list);
|
||||
}
|
||||
|
||||
void NicAddressStatsListIter_next(NicAddressStatsListIter* this)
|
||||
{
|
||||
PointerListIter_next( (PointerListIter*)this);
|
||||
}
|
||||
|
||||
NicAddressStats* NicAddressStatsListIter_value(NicAddressStatsListIter* this)
|
||||
{
|
||||
return (struct NicAddressStats*)PointerListIter_value( (PointerListIter*)this);
|
||||
}
|
||||
|
||||
bool NicAddressStatsListIter_end(NicAddressStatsListIter* this)
|
||||
{
|
||||
return PointerListIter_end( (PointerListIter*)this);
|
||||
}
|
||||
|
||||
/**
|
||||
* note: the current iterator becomes invalid after the call (use the returned iterator)
|
||||
* @return the new iterator that points to the element just behind the erased one
|
||||
*/
|
||||
NicAddressStatsListIter NicAddressStatsListIter_remove(NicAddressStatsListIter* this)
|
||||
{
|
||||
NicAddressStatsListIter newIter = *this;
|
||||
|
||||
NicAddressStatsListIter_next(&newIter); // the new iter that will be returned
|
||||
|
||||
PointerListIter_remove( (PointerListIter*)this);
|
||||
|
||||
return newIter;
|
||||
}
|
||||
|
||||
#endif /*NICADDRESSSTATSLISTITER_H_*/
|
||||
143
client_module/source/common/net/sock/PooledSocket.h
Normal file
143
client_module/source/common/net/sock/PooledSocket.h
Normal file
@@ -0,0 +1,143 @@
|
||||
#ifndef POOLEDSOCKET_H_
|
||||
#define POOLEDSOCKET_H_
|
||||
|
||||
#include <common/net/sock/Socket.h>
|
||||
#include <common/toolkit/Time.h>
|
||||
|
||||
|
||||
struct PooledSocket;
|
||||
typedef struct PooledSocket PooledSocket;
|
||||
struct ConnectionList;
|
||||
typedef struct ConnectionList ConnectionList;
|
||||
|
||||
|
||||
static inline void _PooledSocket_init(PooledSocket* this, NicAddrType_t nicType);
|
||||
static inline void _PooledSocket_uninit(Socket* this);
|
||||
|
||||
// inliners
|
||||
static inline bool PooledSocket_getHasExpired(PooledSocket* this, unsigned expireSecs);
|
||||
|
||||
// getters & setters
|
||||
static inline bool PooledSocket_isAvailable(PooledSocket* this);
|
||||
static inline void PooledSocket_setAvailable(PooledSocket* this, bool available);
|
||||
static inline bool PooledSocket_getHasActivity(PooledSocket* this);
|
||||
static inline void PooledSocket_setHasActivity(PooledSocket* this);
|
||||
static inline void PooledSocket_resetHasActivity(PooledSocket* this);
|
||||
static inline bool PooledSocket_getHasExpirationTimer(PooledSocket* this);
|
||||
static inline void PooledSocket_setExpireTimeStart(PooledSocket* this);
|
||||
static inline NicAddrType_t PooledSocket_getNicType(PooledSocket* this);
|
||||
static inline ConnectionList* PooledSocket_getPool(PooledSocket* this);
|
||||
static inline PointerListElem* PooledSocket_getPoolElem(PooledSocket* this);
|
||||
static inline void PooledSocket_setPool(PooledSocket* this, ConnectionList* pool,
|
||||
PointerListElem* poolElem);
|
||||
|
||||
|
||||
/**
|
||||
* This class provides special extensions for sockets in a NodeConnPool.
|
||||
*/
|
||||
struct PooledSocket
|
||||
{
|
||||
Socket socket;
|
||||
ConnectionList* pool;
|
||||
PointerListElem* poolElem;
|
||||
bool available; // == !acquired
|
||||
bool hasActivity; // true if channel was not idle (part of channel class in fhgfs_common)
|
||||
bool closeOnRelease; /* release must close socket. used for signal handling */
|
||||
Time expireTimeStart; // 0 means "doesn't expire", otherwise time when conn was established
|
||||
NicAddrType_t nicType; // same as the interface for which this conn was established
|
||||
};
|
||||
|
||||
|
||||
void _PooledSocket_init(PooledSocket* this, NicAddrType_t nicType)
|
||||
{
|
||||
_Socket_init( (Socket*)this);
|
||||
|
||||
this->available = false;
|
||||
this->hasActivity = true; // initially active to avoid immediate disconnection
|
||||
this->closeOnRelease = false;
|
||||
Time_initZero(&this->expireTimeStart);
|
||||
this->nicType = nicType;
|
||||
this->pool = NULL;
|
||||
this->poolElem = NULL;
|
||||
}
|
||||
|
||||
void _PooledSocket_uninit(Socket* this)
|
||||
{
|
||||
_Socket_uninit(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests whether this socket is set to expire and whether its expire time has been exceeded.
|
||||
*
|
||||
* @param expireSecs the time in seconds after which an expire-enabled socket expires.
|
||||
* @return true if this socket has expired.
|
||||
*/
|
||||
bool PooledSocket_getHasExpired(PooledSocket* this, unsigned expireSecs)
|
||||
{
|
||||
if(likely(Time_getIsZero(&this->expireTimeStart) ) )
|
||||
return false;
|
||||
|
||||
if(Time_elapsedMS(&this->expireTimeStart) > (expireSecs*1000) ) // "*1000" for milliseconds
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PooledSocket_isAvailable(PooledSocket* this)
|
||||
{
|
||||
return this->available;
|
||||
}
|
||||
|
||||
void PooledSocket_setAvailable(PooledSocket* this, bool available)
|
||||
{
|
||||
this->available = available;
|
||||
}
|
||||
|
||||
bool PooledSocket_getHasActivity(PooledSocket* this)
|
||||
{
|
||||
return this->hasActivity;
|
||||
}
|
||||
|
||||
void PooledSocket_setHasActivity(PooledSocket* this)
|
||||
{
|
||||
this->hasActivity = true;
|
||||
}
|
||||
|
||||
void PooledSocket_resetHasActivity(PooledSocket* this)
|
||||
{
|
||||
this->hasActivity = false;
|
||||
}
|
||||
|
||||
bool PooledSocket_getHasExpirationTimer(PooledSocket* this)
|
||||
{
|
||||
return !Time_getIsZero(&this->expireTimeStart);
|
||||
}
|
||||
|
||||
void PooledSocket_setExpireTimeStart(PooledSocket* this)
|
||||
{
|
||||
Time_setToNow(&this->expireTimeStart);
|
||||
}
|
||||
|
||||
NicAddrType_t PooledSocket_getNicType(PooledSocket* this)
|
||||
{
|
||||
return this->nicType;
|
||||
}
|
||||
|
||||
void PooledSocket_setPool(PooledSocket* this, ConnectionList* pool,
|
||||
PointerListElem* poolElem)
|
||||
{
|
||||
this->pool = pool;
|
||||
this->poolElem = poolElem;
|
||||
}
|
||||
|
||||
ConnectionList* PooledSocket_getPool(PooledSocket* this)
|
||||
{
|
||||
return this->pool;
|
||||
}
|
||||
|
||||
PointerListElem* PooledSocket_getPoolElem(PooledSocket* this)
|
||||
{
|
||||
return this->poolElem;
|
||||
}
|
||||
|
||||
#endif /*POOLEDSOCKET_H_*/
|
||||
233
client_module/source/common/net/sock/RDMASocket.c
Normal file
233
client_module/source/common/net/sock/RDMASocket.c
Normal file
@@ -0,0 +1,233 @@
|
||||
#include <common/net/sock/RDMASocket.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
#include <linux/in.h>
|
||||
#include <linux/poll.h>
|
||||
|
||||
|
||||
// Note: These are historical defaults designed for SDR IB and do not provide
|
||||
// the best performance for current IB fabrics. Ideally, buf_size should be
|
||||
// configured as the largest chunksize used by the filesystem and buf_num
|
||||
// will be 3. It would be ideal to take buf_num down to 1, but the current
|
||||
// protocol requires at least 3 buffers.
|
||||
// buf_num=64; buf_size=4*1024 (=> 512kB per socket for send and recv)
|
||||
|
||||
#define RDMASOCKET_DEFAULT_BUF_NUM (128) // moved to config
|
||||
#define RDMASOCKET_DEFAULT_BUF_SIZE (4*1024) // moved to config
|
||||
#define RDMASOCKET_DEFAULT_FRAGMENT_SIZE RDMASOCKET_DEFAULT_BUF_SIZE // moved to config
|
||||
#define RDMASOCKET_DEFAULT_KEY_TYPE RDMAKEYTYPE_UnsafeGlobal
|
||||
|
||||
static const struct SocketOps rdmaOps = {
|
||||
.uninit = _RDMASocket_uninit,
|
||||
|
||||
.connectByIP = _RDMASocket_connectByIP,
|
||||
.bindToAddr = _RDMASocket_bindToAddr,
|
||||
.listen = _RDMASocket_listen,
|
||||
.shutdown = _RDMASocket_shutdown,
|
||||
.shutdownAndRecvDisconnect = _RDMASocket_shutdownAndRecvDisconnect,
|
||||
|
||||
.sendto = _RDMASocket_sendto,
|
||||
.recvT = _RDMASocket_recvT,
|
||||
};
|
||||
|
||||
bool RDMASocket_init(RDMASocket* this, struct in_addr src, NicAddressStats* nicStats)
|
||||
{
|
||||
Socket* thisBase = (Socket*)this;
|
||||
|
||||
// init super class
|
||||
_PooledSocket_init( (PooledSocket*)this, NICADDRTYPE_RDMA);
|
||||
|
||||
thisBase->ops = &rdmaOps;
|
||||
|
||||
// normal init part
|
||||
|
||||
thisBase->sockType = NICADDRTYPE_RDMA;
|
||||
|
||||
this->commCfg.bufNum = RDMASOCKET_DEFAULT_BUF_NUM;
|
||||
this->commCfg.bufSize = RDMASOCKET_DEFAULT_BUF_SIZE;
|
||||
this->commCfg.fragmentSize = RDMASOCKET_DEFAULT_FRAGMENT_SIZE;
|
||||
this->commCfg.keyType = RDMASocket_toIBVSocketKeyType(RDMASOCKET_DEFAULT_KEY_TYPE);
|
||||
|
||||
if(!IBVSocket_init(&this->ibvsock, src, nicStats) )
|
||||
goto err_ibv;
|
||||
|
||||
return true;
|
||||
|
||||
err_ibv:
|
||||
_PooledSocket_uninit(&this->pooledSocket.socket);
|
||||
return false;
|
||||
}
|
||||
|
||||
RDMASocket* RDMASocket_construct(struct in_addr src, NicAddressStats *nicStats)
|
||||
{
|
||||
RDMASocket* this = kmalloc(sizeof(*this), GFP_NOFS);
|
||||
|
||||
if(!this ||
|
||||
!RDMASocket_init(this, src, nicStats) )
|
||||
{
|
||||
kfree(this);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
void _RDMASocket_uninit(Socket* this)
|
||||
{
|
||||
RDMASocket* thisCast = (RDMASocket*)this;
|
||||
|
||||
IBVSocket_uninit(&thisCast->ibvsock);
|
||||
_PooledSocket_uninit(this);
|
||||
}
|
||||
|
||||
bool RDMASocket_rdmaDevicesExist(void)
|
||||
{
|
||||
#ifdef BEEGFS_RDMA
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool _RDMASocket_connectByIP(Socket* this, struct in_addr ipaddress, unsigned short port)
|
||||
{
|
||||
// note: does not set the family type to the one of this socket.
|
||||
|
||||
RDMASocket* thisCast = (RDMASocket*)this;
|
||||
|
||||
bool connRes;
|
||||
|
||||
connRes = IBVSocket_connectByIP(&thisCast->ibvsock, ipaddress, port, &thisCast->commCfg);
|
||||
|
||||
if(!connRes)
|
||||
{
|
||||
// note: this message would flood the log if hosts are unreachable on the primary interface
|
||||
|
||||
//char* ipStr = SocketTk_ipaddrToStr(ipaddress);
|
||||
//printk_fhgfs(KERN_WARNING, "RDMASocket failed to connect to %s.\n", ipStr);
|
||||
//kfree(ipStr);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// connected
|
||||
|
||||
// set peername if not done so already (e.g. by connect(hostname) )
|
||||
if(!this->peername[0])
|
||||
{
|
||||
SocketTk_endpointAddrToStrNoAlloc(this->peername, SOCKET_PEERNAME_LEN, ipaddress, port);
|
||||
this->peerIP = ipaddress;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _RDMASocket_bindToAddr(Socket* this, struct in_addr ipaddress, unsigned short port)
|
||||
{
|
||||
RDMASocket* thisCast = (RDMASocket*)this;
|
||||
|
||||
bool bindRes;
|
||||
|
||||
bindRes = IBVSocket_bindToAddr(&thisCast->ibvsock, ipaddress, port);
|
||||
if(!bindRes)
|
||||
{
|
||||
//printk_fhgfs_debug(KERN_INFO, "Failed to bind RDMASocket.\n"); // debug in
|
||||
return false;
|
||||
}
|
||||
|
||||
this->boundPort = port;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _RDMASocket_listen(Socket* this)
|
||||
{
|
||||
RDMASocket* thisCast = (RDMASocket*)this;
|
||||
|
||||
bool listenRes;
|
||||
|
||||
listenRes = IBVSocket_listen(&thisCast->ibvsock);
|
||||
if(!listenRes)
|
||||
{
|
||||
printk_fhgfs(KERN_WARNING, "Failed to set RDMASocket to listening mode.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
snprintf(this->peername, SOCKET_PEERNAME_LEN, "Listen(Port: %u)", this->boundPort);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _RDMASocket_shutdown(Socket* this)
|
||||
{
|
||||
RDMASocket* thisCast = (RDMASocket*)this;
|
||||
|
||||
bool shutRes = IBVSocket_shutdown(&thisCast->ibvsock);
|
||||
if(!shutRes)
|
||||
{
|
||||
printk_fhgfs_debug(KERN_INFO, "RDMASocket failed to send shutdown.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: The RecvDisconnect-part is currently not implemented, so this is equal to the
|
||||
* normal shutdown() method.
|
||||
*/
|
||||
bool _RDMASocket_shutdownAndRecvDisconnect(Socket* this, int timeoutMS)
|
||||
{
|
||||
return this->ops->shutdown(this);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return -ETIMEDOUT on timeout
|
||||
*/
|
||||
ssize_t _RDMASocket_recvT(Socket* this, struct iov_iter* iter, int flags, int timeoutMS)
|
||||
{
|
||||
RDMASocket* thisCast = (RDMASocket*)this;
|
||||
|
||||
ssize_t retVal;
|
||||
|
||||
retVal = IBVSocket_recvT(&thisCast->ibvsock, iter, flags, timeoutMS);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: This is a connection-based socket type, so to and tolen are ignored.
|
||||
*
|
||||
* @param flags ignored
|
||||
*/
|
||||
ssize_t _RDMASocket_sendto(Socket* this, struct iov_iter* iter, int flags,
|
||||
fhgfs_sockaddr_in *to)
|
||||
{
|
||||
RDMASocket* thisCast = (RDMASocket*)this;
|
||||
|
||||
ssize_t retVal;
|
||||
|
||||
retVal = IBVSocket_send(&thisCast->ibvsock, iter, flags);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register for polling (=> this method does not call schedule() !).
|
||||
*
|
||||
* Note: Call this only once with finishPoll==true (=> non-blocking) or multiple times with
|
||||
* finishPoll==true in the last call from the current thread (for cleanup).
|
||||
* Note: It's safe to call this multiple times with finishPoll==true.
|
||||
*
|
||||
* @param events the event flags you are interested in (POLL...)
|
||||
* @param finishPoll true for cleanup if you don't call poll again from this thread; (it's also ok
|
||||
* to set this to true if you call poll only once and want to avoid blocking)
|
||||
* @return mask revents mask (like poll() => POLL... flags), but only the events you requested or
|
||||
* error events
|
||||
*/
|
||||
unsigned long RDMASocket_poll(RDMASocket* this, short events, bool finishPoll)
|
||||
{
|
||||
return IBVSocket_poll(&this->ibvsock, events, finishPoll);
|
||||
}
|
||||
|
||||
133
client_module/source/common/net/sock/RDMASocket.h
Normal file
133
client_module/source/common/net/sock/RDMASocket.h
Normal file
@@ -0,0 +1,133 @@
|
||||
#ifndef OPEN_RDMASOCKET_H_
|
||||
#define OPEN_RDMASOCKET_H_
|
||||
|
||||
#include <common/toolkit/SocketTk.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <common/toolkit/Time.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/net/sock/ibv/IBVSocket.h>
|
||||
#include <common/net/sock/PooledSocket.h>
|
||||
#include <common/net/sock/NicAddressStats.h>
|
||||
#include <app/config/Config.h>
|
||||
|
||||
struct ib_device;
|
||||
struct ib_mr;
|
||||
struct RDMASocket;
|
||||
typedef struct RDMASocket RDMASocket;
|
||||
struct NicAddressStats;
|
||||
typedef struct NicAddressStats NicAddressStats;
|
||||
|
||||
|
||||
extern __must_check bool RDMASocket_init(RDMASocket* this, struct in_addr srcIpAddr, NicAddressStats* nicStats);
|
||||
extern RDMASocket* RDMASocket_construct(struct in_addr srcIpAddr, NicAddressStats* nicStats);
|
||||
extern void _RDMASocket_uninit(Socket* this);
|
||||
|
||||
extern bool RDMASocket_rdmaDevicesExist(void);
|
||||
|
||||
extern bool _RDMASocket_connectByIP(Socket* this, struct in_addr ipaddress,
|
||||
unsigned short port);
|
||||
extern bool _RDMASocket_bindToAddr(Socket* this, struct in_addr ipaddress,
|
||||
unsigned short port);
|
||||
extern bool _RDMASocket_listen(Socket* this);
|
||||
extern bool _RDMASocket_shutdown(Socket* this);
|
||||
extern bool _RDMASocket_shutdownAndRecvDisconnect(Socket* this, int timeoutMS);
|
||||
|
||||
extern ssize_t _RDMASocket_recvT(Socket* this, struct iov_iter* iter, int flags,
|
||||
int timeoutMS);
|
||||
extern ssize_t _RDMASocket_sendto(Socket* this, struct iov_iter* iter, int flags,
|
||||
fhgfs_sockaddr_in *to);
|
||||
|
||||
extern unsigned long RDMASocket_poll(RDMASocket* this, short events, bool finishPoll);
|
||||
|
||||
// inliners
|
||||
static inline struct ib_device* RDMASocket_getDevice(RDMASocket* this);
|
||||
static inline unsigned RDMASocket_getRkey(RDMASocket* this);
|
||||
static inline bool RDMASocket_isRkeyGlobal(RDMASocket* this);
|
||||
|
||||
static inline void RDMASocket_setBuffers(RDMASocket* this, unsigned bufNum, unsigned bufSize,
|
||||
unsigned fragmentSize, RDMAKeyType keyType);
|
||||
static inline void RDMASocket_setTimeouts(RDMASocket* this, int connectMS,
|
||||
int completionMS, int flowSendMS, int flowRecvMS, int pollMS);
|
||||
static inline void RDMASocket_setTypeOfService(RDMASocket* this, int typeOfService);
|
||||
static inline void RDMASocket_setConnectionFailureStatus(RDMASocket* this, unsigned value);
|
||||
static inline bool RDMASocket_registerMr(RDMASocket* this, struct ib_mr* mr, int access);
|
||||
static inline IBVSocketKeyType RDMASocket_toIBVSocketKeyType(RDMAKeyType keyType);
|
||||
|
||||
struct RDMASocket
|
||||
{
|
||||
PooledSocket pooledSocket;
|
||||
|
||||
IBVSocket ibvsock;
|
||||
|
||||
IBVCommConfig commCfg;
|
||||
};
|
||||
|
||||
unsigned RDMASocket_getRkey(RDMASocket *this)
|
||||
{
|
||||
return IBVSocket_getRkey(&this->ibvsock);
|
||||
}
|
||||
|
||||
bool RDMASocket_isRkeyGlobal(RDMASocket* this)
|
||||
{
|
||||
return this->commCfg.keyType != IBVSOCKETKEYTYPE_Register;
|
||||
}
|
||||
|
||||
struct ib_device* RDMASocket_getDevice(RDMASocket *this)
|
||||
{
|
||||
return IBVSocket_getDevice(&this->ibvsock);
|
||||
}
|
||||
|
||||
IBVSocketKeyType RDMASocket_toIBVSocketKeyType(RDMAKeyType keyType)
|
||||
{
|
||||
switch (keyType)
|
||||
{
|
||||
case RDMAKEYTYPE_UnsafeDMA:
|
||||
return IBVSOCKETKEYTYPE_UnsafeDMA;
|
||||
case RDMAKEYTYPE_Register:
|
||||
return IBVSOCKETKEYTYPE_Register;
|
||||
default:
|
||||
return IBVSOCKETKEYTYPE_UnsafeGlobal;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Only has an effect for unconnected sockets.
|
||||
*/
|
||||
void RDMASocket_setBuffers(RDMASocket* this, unsigned bufNum, unsigned bufSize,
|
||||
unsigned fragmentSize, RDMAKeyType keyType)
|
||||
{
|
||||
this->commCfg.bufNum = bufNum;
|
||||
this->commCfg.bufSize = bufSize;
|
||||
this->commCfg.fragmentSize = fragmentSize;
|
||||
this->commCfg.keyType = RDMASocket_toIBVSocketKeyType(keyType);
|
||||
}
|
||||
|
||||
void RDMASocket_setTimeouts(RDMASocket* this, int connectMS,
|
||||
int completionMS, int flowSendMS, int flowRecvMS, int pollMS)
|
||||
{
|
||||
IBVSocket_setTimeouts(&this->ibvsock, connectMS, completionMS, flowSendMS,
|
||||
flowRecvMS, pollMS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Only has an effect for unconnected sockets.
|
||||
*/
|
||||
void RDMASocket_setTypeOfService(RDMASocket* this, int typeOfService)
|
||||
{
|
||||
IBVSocket_setTypeOfService(&this->ibvsock, typeOfService);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Only has an effect for unconnected sockets.
|
||||
*/
|
||||
void RDMASocket_setConnectionFailureStatus(RDMASocket* this, unsigned value)
|
||||
{
|
||||
IBVSocket_setConnectionFailureStatus(&this->ibvsock, value);
|
||||
}
|
||||
|
||||
bool RDMASocket_registerMr(RDMASocket* this, struct ib_mr* mr, int access)
|
||||
{
|
||||
return !IBVSocket_registerMr(&this->ibvsock, mr, access);
|
||||
}
|
||||
|
||||
#endif /*OPEN_RDMASOCKET_H_*/
|
||||
28
client_module/source/common/net/sock/Socket.c
Normal file
28
client_module/source/common/net/sock/Socket.c
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <common/toolkit/SocketTk.h>
|
||||
#include <common/net/sock/Socket.h>
|
||||
#include <common/threading/Thread.h>
|
||||
#include <linux/in.h>
|
||||
|
||||
void _Socket_init(Socket* this)
|
||||
{
|
||||
memset(this, 0, sizeof(*this) );
|
||||
|
||||
this->sockType = NICADDRTYPE_STANDARD;
|
||||
this->boundPort = -1;
|
||||
}
|
||||
|
||||
void _Socket_uninit(Socket* this)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool Socket_bind(Socket* this, unsigned short port)
|
||||
{
|
||||
struct in_addr ipAddr = { INADDR_ANY };
|
||||
return this->ops->bindToAddr(this, ipAddr, port);
|
||||
}
|
||||
|
||||
bool Socket_bindToAddr(Socket* this, struct in_addr ipAddr, unsigned short port)
|
||||
{
|
||||
return this->ops->bindToAddr(this, ipAddr, port);
|
||||
}
|
||||
194
client_module/source/common/net/sock/Socket.h
Normal file
194
client_module/source/common/net/sock/Socket.h
Normal file
@@ -0,0 +1,194 @@
|
||||
#ifndef SOCKET_H_
|
||||
#define SOCKET_H_
|
||||
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <common/toolkit/Time.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/net/sock/NicAddress.h>
|
||||
#include <linux/socket.h>
|
||||
#include <os/iov_iter.h>
|
||||
|
||||
|
||||
#define SOCKET_PEERNAME_LEN 24
|
||||
|
||||
/*
|
||||
* This is an abstract class.
|
||||
*/
|
||||
|
||||
|
||||
struct Socket;
|
||||
typedef struct Socket Socket;
|
||||
|
||||
|
||||
extern void _Socket_init(Socket* this);
|
||||
extern void _Socket_uninit(Socket* this);
|
||||
|
||||
extern bool Socket_bind(Socket* this, unsigned short port);
|
||||
extern bool Socket_bindToAddr(Socket* this, struct in_addr ipAddr, unsigned short port);
|
||||
|
||||
|
||||
|
||||
struct SocketOps
|
||||
{
|
||||
void (*uninit)(Socket* this);
|
||||
|
||||
bool (*connectByIP)(Socket* this, struct in_addr ipaddress, unsigned short port);
|
||||
bool (*bindToAddr)(Socket* this, struct in_addr ipaddress, unsigned short port);
|
||||
bool (*listen)(Socket* this);
|
||||
bool (*shutdown)(Socket* this);
|
||||
bool (*shutdownAndRecvDisconnect)(Socket* this, int timeoutMS);
|
||||
|
||||
ssize_t (*sendto)(Socket* this, struct iov_iter* iter, int flags, fhgfs_sockaddr_in *to);
|
||||
ssize_t (*recvT)(Socket* this, struct iov_iter* iter, int flags, int timeoutMS);
|
||||
};
|
||||
|
||||
struct Socket
|
||||
{
|
||||
NicAddrType_t sockType;
|
||||
char peername[SOCKET_PEERNAME_LEN];
|
||||
struct in_addr peerIP;
|
||||
int boundPort;
|
||||
|
||||
const struct SocketOps* ops;
|
||||
|
||||
struct {
|
||||
struct list_head _list;
|
||||
short _events;
|
||||
short revents;
|
||||
} poll;
|
||||
};
|
||||
|
||||
|
||||
static inline NicAddrType_t Socket_getSockType(Socket* this)
|
||||
{
|
||||
return this->sockType;
|
||||
}
|
||||
|
||||
static inline char* Socket_getPeername(Socket* this)
|
||||
{
|
||||
return this->peername;
|
||||
}
|
||||
|
||||
static inline struct in_addr Socket_getPeerIP(Socket* this)
|
||||
{
|
||||
return this->peerIP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the virtual uninit method and kfrees the object.
|
||||
*/
|
||||
static inline void Socket_virtualDestruct(Socket* this)
|
||||
{
|
||||
this->ops->uninit(this);
|
||||
kfree(this);
|
||||
}
|
||||
|
||||
static inline ssize_t Socket_recvT(Socket* this, struct iov_iter *iter,
|
||||
size_t length, int flags, int timeoutMS)
|
||||
{
|
||||
// TODO: implementation function should accept length as well.
|
||||
struct iov_iter copy = *iter;
|
||||
iov_iter_truncate(©, length);
|
||||
|
||||
{
|
||||
ssize_t nread = this->ops->recvT(this, ©, flags, timeoutMS);
|
||||
|
||||
if (nread >= 0)
|
||||
{
|
||||
// TODO: currently some parts of the project expect that we advance
|
||||
// the iov_iter. But as it turns out, advancing here does not mesh
|
||||
// well with how iov_iter is supposed to be used. A problem can be
|
||||
// observed when advancing an iov_iter of type ITER_PIPE. This will
|
||||
// result in mutation of external state (struct pipe_inode_info). IOW
|
||||
// we can't just make a copy of any iov_iter and advance that in
|
||||
// isolation.
|
||||
//
|
||||
// That means, the code should be changed such that we advance only in
|
||||
// the outermost layers of the beegfs client module.
|
||||
|
||||
iov_iter_advance(iter, nread);
|
||||
}
|
||||
|
||||
return nread;
|
||||
}
|
||||
}
|
||||
|
||||
static inline ssize_t Socket_recvT_kernel(Socket* this, void *buffer,
|
||||
size_t length, int flags, int timeoutMS)
|
||||
{
|
||||
struct iov_iter *iter = STACK_ALLOC_BEEGFS_ITER_KVEC(buffer, length, READ);
|
||||
return this->ops->recvT(this, iter, flags, timeoutMS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Receive with timeout, extended version with numReceivedBeforeError.
|
||||
*
|
||||
* note: this uses a soft timeout that is being reset after each received data packet.
|
||||
*
|
||||
* @param outNumReceivedBeforeError number of bytes received before returning (also set in case of
|
||||
* an error, e.g. timeout); given value will only be increased and is intentionally not set to 0
|
||||
* initially.
|
||||
* @return -ETIMEDOUT on timeout.
|
||||
*/
|
||||
static inline ssize_t Socket_recvExactTEx(Socket* this, struct iov_iter *iter, size_t len, int flags, int timeoutMS,
|
||||
size_t* outNumReceivedBeforeError)
|
||||
{
|
||||
ssize_t missingLen = len;
|
||||
|
||||
do
|
||||
{
|
||||
ssize_t recvRes = this->ops->recvT(this, iter, flags, timeoutMS);
|
||||
|
||||
if(unlikely(recvRes <= 0) )
|
||||
return recvRes;
|
||||
|
||||
missingLen -= recvRes;
|
||||
*outNumReceivedBeforeError += recvRes;
|
||||
|
||||
} while(missingLen);
|
||||
|
||||
// all received if we got here
|
||||
return len;
|
||||
}
|
||||
|
||||
static inline ssize_t Socket_recvExactTEx_kernel(Socket* this, void *buf, size_t len, int flags, int timeoutMS,
|
||||
size_t* outNumReceivedBeforeError)
|
||||
{
|
||||
struct iov_iter *iter = STACK_ALLOC_BEEGFS_ITER_KVEC(buf, len, READ);
|
||||
return Socket_recvExactTEx(this, iter, len, flags, timeoutMS, outNumReceivedBeforeError);
|
||||
}
|
||||
|
||||
/**
|
||||
* Receive with timeout.
|
||||
*
|
||||
* @return -ETIMEDOUT on timeout.
|
||||
*/
|
||||
static inline ssize_t Socket_recvExactT(Socket* this, struct iov_iter *iter, size_t len, int flags, int timeoutMS)
|
||||
{
|
||||
size_t numReceivedBeforeError;
|
||||
|
||||
return Socket_recvExactTEx(this, iter, len, flags, timeoutMS, &numReceivedBeforeError);
|
||||
}
|
||||
static inline ssize_t Socket_recvExactT_kernel(Socket* this, void *buf, size_t len, int flags, int timeoutMS)
|
||||
{
|
||||
size_t numReceivedBeforeError;
|
||||
|
||||
return Socket_recvExactTEx_kernel(this, buf, len, flags, timeoutMS, &numReceivedBeforeError);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline ssize_t Socket_sendto_kernel(Socket *this, const void *buf, size_t len, int flags,
|
||||
fhgfs_sockaddr_in *to)
|
||||
{
|
||||
struct iov_iter *iter = STACK_ALLOC_BEEGFS_ITER_KVEC(buf, len, WRITE);
|
||||
return this->ops->sendto(this, iter, flags, to);
|
||||
}
|
||||
|
||||
static inline ssize_t Socket_send_kernel(Socket *this, const void *buf, size_t len, int flags)
|
||||
{
|
||||
return Socket_sendto_kernel(this, buf, len, flags, NULL);
|
||||
}
|
||||
|
||||
|
||||
#endif /*SOCKET_H_*/
|
||||
660
client_module/source/common/net/sock/StandardSocket.c
Normal file
660
client_module/source/common/net/sock/StandardSocket.c
Normal file
@@ -0,0 +1,660 @@
|
||||
#include <common/net/sock/StandardSocket.h>
|
||||
#include <common/toolkit/Serialization.h>
|
||||
#include <common/toolkit/SocketTk.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
#include <linux/in.h>
|
||||
#include <linux/tcp.h>
|
||||
|
||||
|
||||
#define SOCKET_LISTEN_BACKLOG 32
|
||||
#define SOCKET_SHUTDOWN_RECV_BUF_LEN 32
|
||||
#define STANDARDSOCKET_CONNECT_TIMEOUT_MS 5000
|
||||
|
||||
static const struct SocketOps standardOps = {
|
||||
.uninit = _StandardSocket_uninit,
|
||||
|
||||
.connectByIP = _StandardSocket_connectByIP,
|
||||
.bindToAddr = _StandardSocket_bindToAddr,
|
||||
.listen = _StandardSocket_listen,
|
||||
.shutdown = _StandardSocket_shutdown,
|
||||
.shutdownAndRecvDisconnect = _StandardSocket_shutdownAndRecvDisconnect,
|
||||
|
||||
.sendto = _StandardSocket_sendto,
|
||||
.recvT = _StandardSocket_recvT,
|
||||
};
|
||||
|
||||
#ifdef KERNEL_HAS_SKWQ_HAS_SLEEPER
|
||||
# define __sock_has_sleeper(wq) (skwq_has_sleeper(wq))
|
||||
#else
|
||||
# define __sock_has_sleeper(wq) (wq_has_sleeper(wq))
|
||||
#endif
|
||||
|
||||
#if defined(KERNEL_HAS_SK_SLEEP) && !defined(KERNEL_HAS_SK_HAS_SLEEPER)
|
||||
static inline int sk_has_sleeper(struct sock* sk)
|
||||
{
|
||||
return sk->sk_sleep && waitqueue_active(sk->sk_sleep);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KERNEL_WAKE_UP_SYNC_KEY_HAS_3_ARGUMENTS)
|
||||
# define __wake_up_sync_key_m(wq, state, key) __wake_up_sync_key(wq, state, key)
|
||||
#else
|
||||
# define __wake_up_sync_key_m(wq, state, key) __wake_up_sync_key(wq, state, 1, key)
|
||||
#endif
|
||||
|
||||
|
||||
/* unlike linux sock_def_readable, this will also wake TASK_KILLABLE threads. we need this
|
||||
* for SocketTk_poll, which wants to wait for fatal signals only. */
|
||||
#ifdef KERNEL_HAS_SK_DATA_READY_2
|
||||
static void sock_readable(struct sock *sk, int len)
|
||||
#else
|
||||
static void sock_readable(struct sock *sk)
|
||||
#endif
|
||||
{
|
||||
#ifdef KERNEL_HAS_SK_SLEEP
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
if (sk_has_sleeper(sk))
|
||||
{
|
||||
__wake_up_sync_key_m(sk->sk_sleep, TASK_NORMAL,
|
||||
(void*) (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND));
|
||||
}
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
#else
|
||||
struct socket_wq *wq;
|
||||
|
||||
rcu_read_lock();
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (__sock_has_sleeper(wq))
|
||||
{
|
||||
__wake_up_sync_key_m(&wq->wait, TASK_NORMAL,
|
||||
(void*) (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* sock_def_write_space will also not wake uninterruptible threads. additionally, in newer kernels
|
||||
* it uses refcount_t for an optimization we will do not need: linux does not want to wake up
|
||||
* many writers if many of them cannot make progress. we have only a single writer. */
|
||||
static void sock_write_space(struct sock *sk)
|
||||
{
|
||||
#ifdef KERNEL_HAS_SK_SLEEP
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
|
||||
if (sk_has_sleeper(sk))
|
||||
{
|
||||
__wake_up_sync_key_m(sk->sk_sleep, TASK_NORMAL,
|
||||
(void*) (POLLOUT | POLLWRNORM | POLLWRBAND));
|
||||
}
|
||||
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
#else
|
||||
struct socket_wq *wq;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (__sock_has_sleeper(wq))
|
||||
__wake_up_sync_key_m(&wq->wait, TASK_NORMAL, (void*) (POLLOUT | POLLWRNORM | POLLWRBAND));
|
||||
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* sock_def_wakeup, which is called for disconnects, has the same problem. */
|
||||
static void sock_wakeup(struct sock *sk)
|
||||
{
|
||||
#ifdef KERNEL_HAS_SK_SLEEP
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
if (sk_has_sleeper(sk))
|
||||
wake_up_all(sk->sk_sleep);
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
#else
|
||||
struct socket_wq *wq;
|
||||
|
||||
rcu_read_lock();
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (__sock_has_sleeper(wq))
|
||||
wake_up_all(&wq->wait);
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* as does sock_def_error_report */
|
||||
static void sock_error_report(struct sock *sk)
|
||||
{
|
||||
#ifdef KERNEL_HAS_SK_SLEEP
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
if (sk_has_sleeper(sk))
|
||||
__wake_up_sync_key_m(sk->sk_sleep, TASK_NORMAL, (void*) (POLLERR));
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
#else
|
||||
struct socket_wq *wq;
|
||||
|
||||
rcu_read_lock();
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (__sock_has_sleeper(wq))
|
||||
__wake_up_sync_key_m(&wq->wait, TASK_NORMAL, (void*) (POLLERR));
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
bool StandardSocket_init(StandardSocket* this, int domain, int type, int protocol)
|
||||
{
|
||||
Socket* thisBase = (Socket*)this;
|
||||
|
||||
NicAddrType_t nicType = NICADDRTYPE_STANDARD;
|
||||
|
||||
// init super class
|
||||
_PooledSocket_init( (PooledSocket*)this, nicType);
|
||||
|
||||
thisBase->ops = &standardOps;
|
||||
|
||||
// normal init part
|
||||
|
||||
this->sock = NULL;
|
||||
|
||||
this->sockDomain = domain;
|
||||
|
||||
return _StandardSocket_initSock(this, domain, type, protocol);
|
||||
}
|
||||
|
||||
StandardSocket* StandardSocket_construct(int domain, int type, int protocol)
|
||||
{
|
||||
StandardSocket* this = kmalloc(sizeof(*this), GFP_NOFS);
|
||||
|
||||
if(!this ||
|
||||
!StandardSocket_init(this, domain, type, protocol) )
|
||||
{
|
||||
kfree(this);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
StandardSocket* StandardSocket_constructUDP(void)
|
||||
{
|
||||
return StandardSocket_construct(PF_INET, SOCK_DGRAM, 0);
|
||||
}
|
||||
|
||||
StandardSocket* StandardSocket_constructTCP(void)
|
||||
{
|
||||
return StandardSocket_construct(PF_INET, SOCK_STREAM, 0);
|
||||
}
|
||||
|
||||
void _StandardSocket_uninit(Socket* this)
|
||||
{
|
||||
StandardSocket* thisCast = (StandardSocket*)this;
|
||||
|
||||
_PooledSocket_uninit(this);
|
||||
|
||||
if(thisCast->sock)
|
||||
sock_release(thisCast->sock);
|
||||
}
|
||||
|
||||
bool _StandardSocket_initSock(StandardSocket* this, int domain, int type, int protocol)
|
||||
{
|
||||
int createRes;
|
||||
|
||||
// prepare/create socket
|
||||
#ifndef KERNEL_HAS_SOCK_CREATE_KERN_NS
|
||||
createRes = sock_create_kern(domain, type, protocol, &this->sock);
|
||||
#else
|
||||
createRes = sock_create_kern(&init_net, domain, type, protocol, &this->sock);
|
||||
#endif
|
||||
if(createRes < 0)
|
||||
{
|
||||
//printk_fhgfs(KERN_WARNING, "Failed to create socket\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
__StandardSocket_setAllocMode(this, GFP_NOFS);
|
||||
this->sock->sk->sk_data_ready = sock_readable;
|
||||
this->sock->sk->sk_write_space = sock_write_space;
|
||||
this->sock->sk->sk_state_change = sock_wakeup;
|
||||
this->sock->sk->sk_error_report = sock_error_report;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void __StandardSocket_setAllocMode(StandardSocket* this, gfp_t flags)
|
||||
{
|
||||
this->sock->sk->sk_allocation = flags;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Use this to change socket options.
|
||||
* Note: Behaves (almost) like user-space setsockopt.
|
||||
*
|
||||
* @return 0 on success, error code otherwise (=> different from userspace version)
|
||||
*/
|
||||
int _StandardSocket_setsockopt(StandardSocket* this, int level,
|
||||
int optname, char* optval, int optlen)
|
||||
{
|
||||
struct socket *sock = this->sock;
|
||||
|
||||
#if defined(KERNEL_HAS_SOCK_SETSOCKOPT_SOCKPTR_T_PARAM)
|
||||
|
||||
sockptr_t ptr = KERNEL_SOCKPTR(optval);
|
||||
|
||||
if (level == SOL_SOCKET)
|
||||
return sock_setsockopt(sock, level, optname, ptr, optlen);
|
||||
else
|
||||
return sock->ops->setsockopt(sock, level, optname, ptr, optlen);
|
||||
|
||||
#elif defined(KERNEL_HAS_GET_FS)
|
||||
|
||||
char __user *ptr = (char __user __force *) optval;
|
||||
int r;
|
||||
|
||||
WITH_PROCESS_CONTEXT
|
||||
if (level == SOL_SOCKET)
|
||||
r = sock_setsockopt(sock, level, optname, ptr, optlen);
|
||||
else
|
||||
r = sock->ops->setsockopt(sock, level, optname, ptr, optlen);
|
||||
return r;
|
||||
|
||||
#else
|
||||
#error need set_fs()/get_fs() if sockptr_t is not available.
|
||||
#endif
|
||||
|
||||
// unreachable
|
||||
BUG();
|
||||
}
|
||||
|
||||
bool StandardSocket_setSoKeepAlive(StandardSocket* this, bool enable)
|
||||
{
|
||||
int val = (enable ? 1 : 0);
|
||||
|
||||
int r = _StandardSocket_setsockopt(this, SOL_SOCKET, SO_KEEPALIVE, (char *) &val, sizeof val);
|
||||
|
||||
return r == 0;
|
||||
}
|
||||
|
||||
bool StandardSocket_setSoBroadcast(StandardSocket* this, bool enable)
|
||||
{
|
||||
int val = (enable ? 1 : 0);
|
||||
|
||||
int r = _StandardSocket_setsockopt(this, SOL_SOCKET, SO_BROADCAST, (char *) &val, sizeof val);
|
||||
|
||||
return r == 0;
|
||||
}
|
||||
|
||||
int StandardSocket_getSoRcvBuf(StandardSocket* this)
|
||||
{
|
||||
//TODO: should this be READ_ONCE()? There are different uses in the Linux kernel
|
||||
return this->sock->sk->sk_rcvbuf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Increase only (buffer will not be set to a smaller value).
|
||||
*
|
||||
* @return false on error, true otherwise (decrease skipping is not an error)
|
||||
*/
|
||||
bool StandardSocket_setSoRcvBuf(StandardSocket* this, int size)
|
||||
{
|
||||
int origBufLen = StandardSocket_getSoRcvBuf(this);
|
||||
|
||||
if (origBufLen >= size)
|
||||
{
|
||||
// we don't decrease buf sizes (but this is not an error)
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* note: according to socket(7) man page, the value given to setsockopt()
|
||||
* is doubled and the doubled value is returned by getsockopt()
|
||||
*
|
||||
* update 2022-05-13: the kernel doubles the value passed to
|
||||
* setsockopt(SO_RCVBUF) to allow for bookkeeping overhead. Halving the
|
||||
* value is probably "not correct" but it's been this way since 2010 and
|
||||
* changing it will potentially do more harm than good at this point.
|
||||
*/
|
||||
|
||||
int val = size/2;
|
||||
|
||||
int r = _StandardSocket_setsockopt(this, SOL_SOCKET, SO_RCVBUF, (char *)
|
||||
&val, sizeof val);
|
||||
|
||||
if(r != 0)
|
||||
printk_fhgfs_debug(KERN_INFO, "%s: setSoRcvBuf error: %d;\n", __func__, r);
|
||||
|
||||
return r == 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool StandardSocket_setTcpNoDelay(StandardSocket* this, bool enable)
|
||||
{
|
||||
int val = (enable ? 1 : 0);
|
||||
|
||||
int r = _StandardSocket_setsockopt(this, SOL_TCP, TCP_NODELAY, (char*) &val, sizeof val);
|
||||
|
||||
return r == 0;
|
||||
}
|
||||
|
||||
bool StandardSocket_setTcpCork(StandardSocket* this, bool enable)
|
||||
{
|
||||
int val = (enable ? 1 : 0);
|
||||
|
||||
int r = _StandardSocket_setsockopt(this, SOL_TCP, TCP_CORK, (char*) &val, sizeof val);
|
||||
|
||||
return r == 0;
|
||||
}
|
||||
|
||||
bool _StandardSocket_connectByIP(Socket* this, struct in_addr ipaddress, unsigned short port)
|
||||
{
|
||||
// note: this might look a bit strange (it's kept similar to the c++ version)
|
||||
|
||||
// note: error messages here would flood the log if hosts are unreachable on primary interface
|
||||
|
||||
|
||||
const int timeoutMS = STANDARDSOCKET_CONNECT_TIMEOUT_MS;
|
||||
|
||||
StandardSocket* thisCast = (StandardSocket*)this;
|
||||
|
||||
int connRes;
|
||||
|
||||
struct sockaddr_in serveraddr =
|
||||
{
|
||||
.sin_family = AF_INET,
|
||||
.sin_addr = ipaddress,
|
||||
.sin_port = htons(port),
|
||||
};
|
||||
|
||||
connRes = kernel_connect(thisCast->sock,
|
||||
(struct sockaddr*) &serveraddr,
|
||||
sizeof(serveraddr),
|
||||
O_NONBLOCK);
|
||||
|
||||
if(connRes)
|
||||
{
|
||||
if(connRes == -EINPROGRESS)
|
||||
{ // wait for "ready to send data"
|
||||
PollState state;
|
||||
int pollRes;
|
||||
|
||||
PollState_init(&state);
|
||||
PollState_addSocket(&state, this, POLLOUT);
|
||||
|
||||
pollRes = SocketTk_poll(&state, timeoutMS);
|
||||
|
||||
if(pollRes > 0)
|
||||
{ // we got something (could also be an error)
|
||||
|
||||
/* note: it's important to test ERR/HUP/NVAL here instead of POLLOUT only, because
|
||||
POLLOUT and POLLERR can be returned together. */
|
||||
|
||||
if(this->poll.revents & (POLLERR | POLLHUP | POLLNVAL) )
|
||||
return false;
|
||||
|
||||
// connection successfully established
|
||||
|
||||
if(!this->peername[0])
|
||||
{
|
||||
SocketTk_endpointAddrToStrNoAlloc(this->peername, SOCKET_PEERNAME_LEN, ipaddress, port);
|
||||
this->peerIP = ipaddress;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
else
|
||||
if(!pollRes)
|
||||
return false; // timeout
|
||||
else
|
||||
return false; // connection error
|
||||
|
||||
} // end of "EINPROGRESS"
|
||||
}
|
||||
else
|
||||
{ // connected immediately
|
||||
|
||||
// set peername if not done so already (e.g. by connect(hostname) )
|
||||
if(!this->peername[0])
|
||||
{
|
||||
SocketTk_endpointAddrToStrNoAlloc(this->peername, SOCKET_PEERNAME_LEN, ipaddress, port);
|
||||
this->peerIP = ipaddress;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool _StandardSocket_bindToAddr(Socket* this, struct in_addr ipaddress, unsigned short port)
|
||||
{
|
||||
StandardSocket* thisCast = (StandardSocket*)this;
|
||||
|
||||
struct sockaddr_in bindAddr;
|
||||
int bindRes;
|
||||
|
||||
bindAddr.sin_family = thisCast->sockDomain;
|
||||
bindAddr.sin_addr = ipaddress;
|
||||
bindAddr.sin_port = htons(port);
|
||||
|
||||
bindRes = kernel_bind(thisCast->sock, (struct sockaddr*)&bindAddr, sizeof(bindAddr) );
|
||||
|
||||
if(bindRes)
|
||||
{
|
||||
printk_fhgfs(KERN_WARNING, "Failed to bind socket. ErrCode: %d\n", bindRes);
|
||||
return false;
|
||||
}
|
||||
|
||||
this->boundPort = port;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _StandardSocket_listen(Socket* this)
|
||||
{
|
||||
StandardSocket* thisCast = (StandardSocket*)this;
|
||||
int r;
|
||||
|
||||
r = kernel_listen(thisCast->sock, SOCKET_LISTEN_BACKLOG);
|
||||
if(r)
|
||||
{
|
||||
printk_fhgfs(KERN_WARNING, "Failed to set socket to listening mode. ErrCode: %d\n",
|
||||
r);
|
||||
return false;
|
||||
}
|
||||
|
||||
snprintf(this->peername, SOCKET_PEERNAME_LEN, "Listen(Port: %d)", this->boundPort);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _StandardSocket_shutdown(Socket* this)
|
||||
{
|
||||
StandardSocket* thisCast = (StandardSocket*)this;
|
||||
|
||||
int sendshutRes;
|
||||
|
||||
sendshutRes = kernel_sock_shutdown(thisCast->sock, SEND_SHUTDOWN);
|
||||
|
||||
if( (sendshutRes < 0) && (sendshutRes != -ENOTCONN) )
|
||||
{
|
||||
printk_fhgfs(KERN_WARNING, "Failed to send shutdown. ErrCode: %d\n", sendshutRes);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _StandardSocket_shutdownAndRecvDisconnect(Socket* this, int timeoutMS)
|
||||
{
|
||||
bool shutRes;
|
||||
char buf[SOCKET_SHUTDOWN_RECV_BUF_LEN];
|
||||
int recvRes;
|
||||
|
||||
shutRes = this->ops->shutdown(this);
|
||||
if(!shutRes)
|
||||
return false;
|
||||
|
||||
// receive until shutdown arrives
|
||||
do
|
||||
{
|
||||
recvRes = Socket_recvT_kernel(this, buf, SOCKET_SHUTDOWN_RECV_BUF_LEN, 0, timeoutMS);
|
||||
} while(recvRes > 0);
|
||||
|
||||
if(recvRes &&
|
||||
(recvRes != -ECONNRESET) )
|
||||
{ // error occurred (but we're disconnecting, so we don't really care about errors)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Compatibility wrappers for sock_sendmsg / sock_recvmsg. At some point in the
|
||||
* 4.x series, the size argument disappeared. */
|
||||
static int beegfs_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags)
|
||||
{
|
||||
#ifdef KERNEL_HAS_RECVMSG_SIZE
|
||||
return sock_recvmsg(sock, msg, len, flags);
|
||||
#else
|
||||
return sock_recvmsg(sock, msg, flags);
|
||||
#endif
|
||||
}
|
||||
static int beegfs_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
|
||||
{
|
||||
#ifdef KERNEL_HAS_RECVMSG_SIZE
|
||||
return sock_sendmsg(sock, msg, len);
|
||||
#else
|
||||
return sock_sendmsg(sock, msg);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @return -ETIMEDOUT on timeout
|
||||
*/
|
||||
ssize_t _StandardSocket_recvT(Socket* this, struct iov_iter* iter, int flags, int timeoutMS)
|
||||
{
|
||||
StandardSocket* thisCast = (StandardSocket*)this;
|
||||
|
||||
return StandardSocket_recvfromT(thisCast, iter, flags, NULL, timeoutMS);
|
||||
}
|
||||
|
||||
|
||||
ssize_t _StandardSocket_sendto(Socket* this, struct iov_iter* iter, int flags,
|
||||
fhgfs_sockaddr_in *to)
|
||||
{
|
||||
StandardSocket* thisCast = (StandardSocket*)this;
|
||||
struct socket *sock = thisCast->sock;
|
||||
|
||||
int sendRes;
|
||||
size_t len;
|
||||
struct sockaddr_in toSockAddr;
|
||||
|
||||
struct msghdr msg =
|
||||
{
|
||||
.msg_control = NULL,
|
||||
.msg_controllen = 0,
|
||||
.msg_flags = flags | MSG_NOSIGNAL,
|
||||
.msg_name = (struct sockaddr*)(to ? &toSockAddr : NULL),
|
||||
.msg_namelen = sizeof(toSockAddr),
|
||||
.msg_iter = *iter,
|
||||
};
|
||||
|
||||
len = iov_iter_count(iter);
|
||||
|
||||
if (to)
|
||||
{
|
||||
toSockAddr.sin_family = thisCast->sockDomain;
|
||||
toSockAddr.sin_addr = to->addr;
|
||||
toSockAddr.sin_port = to->port;
|
||||
}
|
||||
|
||||
sendRes = beegfs_sendmsg(sock, &msg, len);
|
||||
|
||||
if(sendRes >= 0)
|
||||
iov_iter_advance(iter, sendRes);
|
||||
|
||||
return sendRes;
|
||||
}
|
||||
|
||||
ssize_t StandardSocket_recvfrom(StandardSocket* this, struct iov_iter* iter, int flags,
|
||||
fhgfs_sockaddr_in *from)
|
||||
{
|
||||
int recvRes;
|
||||
size_t len;
|
||||
struct sockaddr_in fromSockAddr;
|
||||
struct socket *sock = this->sock;
|
||||
|
||||
struct msghdr msg =
|
||||
{
|
||||
.msg_control = NULL,
|
||||
.msg_controllen = 0,
|
||||
.msg_flags = flags,
|
||||
.msg_name = (struct sockaddr*)&fromSockAddr,
|
||||
.msg_namelen = sizeof(fromSockAddr),
|
||||
.msg_iter = *iter,
|
||||
};
|
||||
|
||||
len = iov_iter_count(iter);
|
||||
|
||||
recvRes = beegfs_recvmsg(sock, &msg, len, flags);
|
||||
|
||||
if(recvRes > 0)
|
||||
iov_iter_advance(iter, recvRes);
|
||||
|
||||
if(from)
|
||||
{
|
||||
from->addr = fromSockAddr.sin_addr;
|
||||
from->port = fromSockAddr.sin_port;
|
||||
}
|
||||
|
||||
return recvRes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return -ETIMEDOUT on timeout
|
||||
*/
|
||||
ssize_t StandardSocket_recvfromT(StandardSocket* this, struct iov_iter* iter, int flags,
|
||||
fhgfs_sockaddr_in *from, int timeoutMS)
|
||||
{
|
||||
Socket* thisBase = (Socket*)this;
|
||||
|
||||
int pollRes;
|
||||
PollState state;
|
||||
|
||||
if(timeoutMS < 0)
|
||||
return StandardSocket_recvfrom(this, iter, flags, from);
|
||||
|
||||
PollState_init(&state);
|
||||
PollState_addSocket(&state, thisBase, POLLIN);
|
||||
|
||||
pollRes = SocketTk_poll(&state, timeoutMS);
|
||||
|
||||
if( (pollRes > 0) && (thisBase->poll.revents & POLLIN) )
|
||||
return StandardSocket_recvfrom(this, iter, flags, from);
|
||||
|
||||
if(!pollRes)
|
||||
return -ETIMEDOUT;
|
||||
|
||||
if(thisBase->poll.revents & POLLERR)
|
||||
printk_fhgfs_debug(KERN_DEBUG, "StandardSocket_recvfromT: poll(): %s: Error condition\n",
|
||||
thisBase->peername);
|
||||
else
|
||||
if(thisBase->poll.revents & POLLHUP)
|
||||
printk_fhgfs_debug(KERN_DEBUG, "StandardSocket_recvfromT: poll(): %s: Hung up\n",
|
||||
thisBase->peername);
|
||||
else
|
||||
if(thisBase->poll.revents & POLLNVAL)
|
||||
printk_fhgfs(KERN_DEBUG, "StandardSocket_recvfromT: poll(): %s: Invalid request\n",
|
||||
thisBase->peername);
|
||||
else
|
||||
printk_fhgfs(KERN_DEBUG, "StandardSocket_recvfromT: poll(): %s: ErrCode: %d\n",
|
||||
thisBase->peername, pollRes);
|
||||
|
||||
return -ECOMM;
|
||||
}
|
||||
68
client_module/source/common/net/sock/StandardSocket.h
Normal file
68
client_module/source/common/net/sock/StandardSocket.h
Normal file
@@ -0,0 +1,68 @@
|
||||
#ifndef OPEN_STANDARDSOCKET_H_
|
||||
#define OPEN_STANDARDSOCKET_H_
|
||||
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <common/toolkit/Time.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/net/sock/PooledSocket.h>
|
||||
|
||||
|
||||
struct StandardSocket;
|
||||
typedef struct StandardSocket StandardSocket;
|
||||
|
||||
|
||||
extern __must_check bool StandardSocket_init(StandardSocket* this, int domain, int type,
|
||||
int protocol);
|
||||
extern StandardSocket* StandardSocket_construct(int domain, int type, int protocol);
|
||||
extern StandardSocket* StandardSocket_constructUDP(void);
|
||||
extern StandardSocket* StandardSocket_constructTCP(void);
|
||||
extern void _StandardSocket_uninit(Socket* this);
|
||||
|
||||
int StandardSocket_getSoRcvBuf(StandardSocket* this);
|
||||
extern bool StandardSocket_setSoKeepAlive(StandardSocket* this, bool enable);
|
||||
extern bool StandardSocket_setSoBroadcast(StandardSocket* this, bool enable);
|
||||
extern bool StandardSocket_setSoRcvBuf(StandardSocket* this, int size);
|
||||
extern bool StandardSocket_setTcpNoDelay(StandardSocket* this, bool enable);
|
||||
extern bool StandardSocket_setTcpCork(StandardSocket* this, bool enable);
|
||||
|
||||
extern bool _StandardSocket_connectByIP(Socket* this, struct in_addr ipaddress,
|
||||
unsigned short port);
|
||||
extern bool _StandardSocket_bindToAddr(Socket* this, struct in_addr ipaddress,
|
||||
unsigned short port);
|
||||
extern bool _StandardSocket_listen(Socket* this);
|
||||
extern bool _StandardSocket_shutdown(Socket* this);
|
||||
extern bool _StandardSocket_shutdownAndRecvDisconnect(Socket* this, int timeoutMS);
|
||||
|
||||
extern ssize_t _StandardSocket_recvT(Socket* this, struct iov_iter* iter, int flags,
|
||||
int timeoutMS);
|
||||
extern ssize_t _StandardSocket_sendto(Socket* this, struct iov_iter* iter, int flags,
|
||||
fhgfs_sockaddr_in *to);
|
||||
|
||||
extern ssize_t StandardSocket_recvfrom(StandardSocket* this, struct iov_iter* iter,
|
||||
int flags, fhgfs_sockaddr_in *from);
|
||||
extern ssize_t StandardSocket_recvfromT(StandardSocket* this, struct iov_iter* iter,
|
||||
int flags, fhgfs_sockaddr_in *from, int timeoutMS);
|
||||
|
||||
extern bool _StandardSocket_initSock(StandardSocket* this, int domain, int type,
|
||||
int protocol);
|
||||
extern void __StandardSocket_setAllocMode(StandardSocket* this, gfp_t flags);
|
||||
extern int _StandardSocket_setsockopt(StandardSocket* this, int level, int optname, char* optval,
|
||||
int optlen);
|
||||
|
||||
// getters & setters
|
||||
static inline struct socket* StandardSocket_getRawSock(StandardSocket* this);
|
||||
|
||||
struct StandardSocket
|
||||
{
|
||||
PooledSocket pooledSocket;
|
||||
struct socket* sock;
|
||||
unsigned short sockDomain;
|
||||
};
|
||||
|
||||
struct socket* StandardSocket_getRawSock(StandardSocket* this)
|
||||
{
|
||||
return this->sock;
|
||||
}
|
||||
|
||||
|
||||
#endif /*OPEN_STANDARDSOCKET_H_*/
|
||||
152
client_module/source/common/net/sock/ibv/IBVBuffer.c
Normal file
152
client_module/source/common/net/sock/ibv/IBVBuffer.c
Normal file
@@ -0,0 +1,152 @@
|
||||
|
||||
|
||||
#include "IBVBuffer.h"
|
||||
#include "IBVSocket.h"
|
||||
#ifdef BEEGFS_RDMA
|
||||
#include <rdma/ib_verbs.h>
|
||||
|
||||
|
||||
bool IBVBuffer_init(IBVBuffer* buffer, IBVCommContext* ctx, size_t bufLen,
|
||||
size_t fragmentLen, enum dma_data_direction dma_dir)
|
||||
{
|
||||
unsigned count;
|
||||
unsigned i;
|
||||
|
||||
if (fragmentLen == 0)
|
||||
fragmentLen = bufLen;
|
||||
count = (bufLen + fragmentLen - 1) / fragmentLen;
|
||||
bufLen = MIN(fragmentLen, bufLen);
|
||||
|
||||
buffer->dma_dir = dma_dir;
|
||||
buffer->buffers = kzalloc(count * sizeof(*buffer->buffers), GFP_KERNEL);
|
||||
buffer->lists = kzalloc(count * sizeof(*buffer->lists), GFP_KERNEL);
|
||||
if(!buffer->buffers || !buffer->lists)
|
||||
goto fail;
|
||||
|
||||
for(i = 0; i < count; i++)
|
||||
{
|
||||
buffer->lists[i].lkey = ctx->pd->local_dma_lkey;
|
||||
buffer->lists[i].length = bufLen;
|
||||
buffer->buffers[i] = kmalloc(bufLen, GFP_KERNEL);
|
||||
if(unlikely(!buffer->buffers[i]))
|
||||
{
|
||||
printk_fhgfs(KERN_ERR, "Failed to allocate buffer size=%zu\n", bufLen);
|
||||
goto fail;
|
||||
}
|
||||
buffer->lists[i].addr = ib_dma_map_single(ctx->pd->device, buffer->buffers[i],
|
||||
bufLen, dma_dir);
|
||||
if (unlikely(ib_dma_mapping_error(ctx->pd->device, buffer->lists[i].addr)))
|
||||
{
|
||||
buffer->lists[i].addr = 0;
|
||||
printk_fhgfs(KERN_ERR, "Failed to dma map buffer size=%zu\n", bufLen);
|
||||
goto fail;
|
||||
}
|
||||
BUG_ON(buffer->lists[i].addr == 0);
|
||||
}
|
||||
|
||||
buffer->bufferSize = bufLen;
|
||||
buffer->listLength = count;
|
||||
buffer->bufferCount = count;
|
||||
return true;
|
||||
|
||||
fail:
|
||||
IBVBuffer_free(buffer, ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool IBVBuffer_initRegistration(IBVBuffer* buffer, IBVCommContext* ctx)
|
||||
{
|
||||
struct scatterlist* sg;
|
||||
int res;
|
||||
int i;
|
||||
|
||||
buffer->mr = ib_alloc_mr(ctx->pd, IB_MR_TYPE_MEM_REG, buffer->bufferCount);
|
||||
if (IS_ERR(buffer->mr))
|
||||
{
|
||||
printk_fhgfs(KERN_ERR, "Failed to alloc mr, errCode=%ld\n", PTR_ERR(buffer->mr));
|
||||
buffer->mr = NULL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
sg = kzalloc(buffer->bufferCount * sizeof(struct scatterlist), GFP_KERNEL);
|
||||
if (sg == NULL)
|
||||
{
|
||||
printk_fhgfs(KERN_ERR, "Failed to alloc sg\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (i = 0; i < buffer->bufferCount; ++i)
|
||||
{
|
||||
sg_dma_address(&sg[i]) = buffer->lists[i].addr;
|
||||
sg_dma_len(&sg[i]) = buffer->lists[i].length;
|
||||
}
|
||||
|
||||
res = ib_map_mr_sg(buffer->mr, sg, buffer->bufferCount, NULL, PAGE_SIZE);
|
||||
kfree(sg);
|
||||
if (res < 0)
|
||||
{
|
||||
printk_fhgfs(KERN_ERR, "Failed to map mr res=%d\n", res);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
fail:
|
||||
if (buffer->mr)
|
||||
{
|
||||
ib_dereg_mr(buffer->mr);
|
||||
buffer->mr = NULL;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void IBVBuffer_free(IBVBuffer* buffer, IBVCommContext* ctx)
|
||||
{
|
||||
if(buffer->buffers && buffer->lists)
|
||||
{
|
||||
unsigned i;
|
||||
for(i = 0; i < buffer->bufferCount; i++)
|
||||
{
|
||||
if (buffer->lists[i].addr)
|
||||
ib_dma_unmap_single(ctx->pd->device, buffer->lists[i].addr,
|
||||
buffer->bufferSize, buffer->dma_dir);
|
||||
|
||||
if (buffer->buffers[i])
|
||||
kfree(buffer->buffers[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer->mr)
|
||||
ib_dereg_mr(buffer->mr);
|
||||
|
||||
if (buffer->buffers)
|
||||
kfree(buffer->buffers);
|
||||
|
||||
if (buffer->lists)
|
||||
kfree(buffer->lists);
|
||||
}
|
||||
|
||||
ssize_t IBVBuffer_fill(IBVBuffer* buffer, struct iov_iter* iter)
|
||||
{
|
||||
ssize_t total = 0;
|
||||
unsigned i;
|
||||
|
||||
for(i = 0; i < buffer->bufferCount && iov_iter_count(iter) > 0; i++)
|
||||
{
|
||||
size_t fragment = MIN(MIN(iov_iter_count(iter), buffer->bufferSize), 0xFFFFFFFF);
|
||||
|
||||
if(copy_from_iter(buffer->buffers[i], fragment, iter) != fragment)
|
||||
return -EFAULT;
|
||||
|
||||
buffer->lists[i].length = fragment;
|
||||
buffer->listLength = i + 1;
|
||||
|
||||
total += fragment;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
#endif
|
||||
49
client_module/source/common/net/sock/ibv/IBVBuffer.h
Normal file
49
client_module/source/common/net/sock/ibv/IBVBuffer.h
Normal file
@@ -0,0 +1,49 @@
|
||||
#ifndef IBVBuffer_h_aMQFNfzrjbEHDOcv216fi
|
||||
#define IBVBuffer_h_aMQFNfzrjbEHDOcv216fi
|
||||
|
||||
#include <common/Common.h>
|
||||
#ifdef BEEGFS_RDMA
|
||||
|
||||
#include <rdma/ib_verbs.h>
|
||||
#include <rdma/rdma_cm.h>
|
||||
#include <rdma/ib_cm.h>
|
||||
|
||||
#include <os/iov_iter.h>
|
||||
|
||||
|
||||
struct IBVBuffer;
|
||||
typedef struct IBVBuffer IBVBuffer;
|
||||
|
||||
struct IBVCommContext;
|
||||
struct IBVSocket;
|
||||
|
||||
|
||||
extern bool IBVBuffer_init(IBVBuffer* buffer, struct IBVCommContext* ctx, size_t bufLen,
|
||||
size_t fragmentLen, enum dma_data_direction dma_dir);
|
||||
/**
|
||||
* Prepare the instance to use its internal ib_mr. This is only needed for buffers used
|
||||
* with RDMA READ/WRITE and when not using a global rkey. This may be called before
|
||||
* the connection is established. Once the connection has been established,
|
||||
* the registration must be completed via a call to IBVSocket_registerMr().
|
||||
*/
|
||||
extern bool IBVBuffer_initRegistration(IBVBuffer* buffer, struct IBVCommContext* ctx);
|
||||
extern void IBVBuffer_free(IBVBuffer* buffer, struct IBVCommContext* ctx);
|
||||
extern ssize_t IBVBuffer_fill(IBVBuffer* buffer, struct iov_iter* iter);
|
||||
|
||||
|
||||
struct IBVBuffer
|
||||
{
|
||||
char** buffers;
|
||||
struct ib_sge* lists;
|
||||
struct ib_mr* mr;
|
||||
|
||||
size_t bufferSize;
|
||||
unsigned bufferCount;
|
||||
|
||||
unsigned listLength;
|
||||
enum dma_data_direction dma_dir;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
2092
client_module/source/common/net/sock/ibv/IBVSocket.c
Normal file
2092
client_module/source/common/net/sock/ibv/IBVSocket.c
Normal file
File diff suppressed because it is too large
Load Diff
283
client_module/source/common/net/sock/ibv/IBVSocket.h
Normal file
283
client_module/source/common/net/sock/ibv/IBVSocket.h
Normal file
@@ -0,0 +1,283 @@
|
||||
#ifndef OPENTK_IBVSOCKET_H_
|
||||
#define OPENTK_IBVSOCKET_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <common/toolkit/Random.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/wait.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/inet_common.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <os/iov_iter.h>
|
||||
|
||||
|
||||
#define IBVSOCKET_PRIVATEDATA_STR "fhgfs0 " // must be exactly(!!) 8 bytes long
|
||||
#define IBVSOCKET_PRIVATEDATA_STR_LEN 8
|
||||
#define IBVSOCKET_PRIVATEDATA_PROTOCOL_VER 1
|
||||
|
||||
struct ib_device;
|
||||
struct ib_mr;
|
||||
|
||||
struct IBVIncompleteRecv;
|
||||
typedef struct IBVIncompleteRecv IBVIncompleteRecv;
|
||||
struct IBVIncompleteSend;
|
||||
typedef struct IBVIncompleteSend IBVIncompleteSend;
|
||||
|
||||
struct IBVCommContext;
|
||||
typedef struct IBVCommContext IBVCommContext;
|
||||
|
||||
struct IBVCommDest;
|
||||
typedef struct IBVCommDest IBVCommDest;
|
||||
|
||||
struct IBVTimeoutConfig;
|
||||
typedef struct IBVTimeoutConfig IBVTimeoutConfig;
|
||||
|
||||
struct IBVSocket; // forward declaration
|
||||
typedef struct IBVSocket IBVSocket;
|
||||
|
||||
struct IBVCommConfig;
|
||||
typedef struct IBVCommConfig IBVCommConfig;
|
||||
|
||||
struct NicAddressStats;
|
||||
typedef struct NicAddressStats NicAddressStats;
|
||||
|
||||
enum IBVSocketKeyType
|
||||
{
|
||||
IBVSOCKETKEYTYPE_UnsafeGlobal = 0,
|
||||
IBVSOCKETKEYTYPE_UnsafeDMA,
|
||||
IBVSOCKETKEYTYPE_Register
|
||||
};
|
||||
typedef enum IBVSocketKeyType IBVSocketKeyType;
|
||||
|
||||
// construction/destruction
|
||||
extern __must_check bool IBVSocket_init(IBVSocket* _this, struct in_addr srcIpAddr, NicAddressStats* nicStats);
|
||||
extern void IBVSocket_uninit(IBVSocket* _this);
|
||||
|
||||
// static
|
||||
extern bool IBVSocket_rdmaDevicesExist(void);
|
||||
|
||||
// methods
|
||||
extern bool IBVSocket_connectByIP(IBVSocket* _this, struct in_addr ipaddress,
|
||||
unsigned short port, IBVCommConfig* commCfg);
|
||||
extern bool IBVSocket_bindToAddr(IBVSocket* _this, struct in_addr ipAddr,
|
||||
unsigned short port);
|
||||
extern bool IBVSocket_listen(IBVSocket* _this);
|
||||
extern bool IBVSocket_shutdown(IBVSocket* _this);
|
||||
|
||||
extern ssize_t IBVSocket_recvT(IBVSocket* _this, struct iov_iter* iter, int flags,
|
||||
int timeoutMS);
|
||||
extern ssize_t IBVSocket_send(IBVSocket* _this, struct iov_iter* iter, int flags);
|
||||
|
||||
extern int IBVSocket_checkConnection(IBVSocket* _this);
|
||||
|
||||
extern unsigned long IBVSocket_poll(IBVSocket* _this, short events, bool finishPoll);
|
||||
|
||||
// getters & setters
|
||||
extern void IBVSocket_setTimeouts(IBVSocket* _this, int connectMS,
|
||||
int completionMS, int flowSendMS, int flowRecvMS, int pollMS);
|
||||
extern void IBVSocket_setTypeOfService(IBVSocket* _this, int typeOfService);
|
||||
extern void IBVSocket_setConnectionFailureStatus(IBVSocket* _this, unsigned value);
|
||||
extern struct in_addr IBVSocket_getSrcIpAddr(IBVSocket* _this);
|
||||
|
||||
// Only access members of NicAddressStats when the owner NodeConnPool mutex is held.
|
||||
// OK to access "nic" without holding mutex.
|
||||
extern NicAddressStats* IBVSocket_getNicStats(IBVSocket* _this);
|
||||
|
||||
extern unsigned IBVSocket_getRkey(IBVSocket* _this);
|
||||
extern struct ib_device* IBVSocket_getDevice(IBVSocket* _this);
|
||||
extern int IBVSocket_registerMr(IBVSocket* _this, struct ib_mr* mr, int access);
|
||||
|
||||
struct IBVTimeoutConfig
|
||||
{
|
||||
int connectMS;
|
||||
int completionMS;
|
||||
int flowSendMS;
|
||||
int flowRecvMS;
|
||||
int pollMS;
|
||||
};
|
||||
|
||||
struct IBVCommConfig
|
||||
{
|
||||
unsigned bufNum; // number of available buffers
|
||||
unsigned bufSize; // total size of each buffer
|
||||
/**
|
||||
* IBVBuffer can allocate the buffer in multiple memory regions. This
|
||||
* is to allow allocation of large buffers without requiring the
|
||||
* buffer to be entirely contiguous. A value of 0 means that the
|
||||
* buffer should not be fragmented.
|
||||
*/
|
||||
unsigned fragmentSize; // size of buffer fragments
|
||||
IBVSocketKeyType keyType; // Which type of rkey for RDMA
|
||||
};
|
||||
|
||||
#ifdef BEEGFS_RDMA
|
||||
#include <rdma/ib_verbs.h>
|
||||
#include <rdma/rdma_cm.h>
|
||||
#include <rdma/ib_cm.h>
|
||||
#include <common/threading/Mutex.h>
|
||||
#include "IBVBuffer.h"
|
||||
|
||||
|
||||
enum IBVSocketConnState;
|
||||
typedef enum IBVSocketConnState IBVSocketConnState_t;
|
||||
|
||||
|
||||
extern bool __IBVSocket_createNewID(IBVSocket* _this);
|
||||
extern bool __IBVSocket_createCommContext(IBVSocket* _this, struct rdma_cm_id* cm_id,
|
||||
IBVCommConfig* commCfg, IBVCommContext** outCommContext);
|
||||
extern void __IBVSocket_cleanupCommContext(struct rdma_cm_id* cm_id, IBVCommContext* commContext);
|
||||
|
||||
extern bool __IBVSocket_initCommDest(IBVCommContext* commContext, IBVCommDest* outDest);
|
||||
extern bool __IBVSocket_parseCommDest(const void* buf, size_t bufLen, IBVCommDest** outDest);
|
||||
|
||||
extern int __IBVSocket_receiveCheck(IBVSocket* _this, int timeoutMS);
|
||||
extern int __IBVSocket_nonblockingSendCheck(IBVSocket* _this);
|
||||
|
||||
extern int __IBVSocket_postRecv(IBVSocket* _this, IBVCommContext* commContext, size_t bufIndex);
|
||||
extern int __IBVSocket_postSend(IBVSocket* _this, size_t bufIndex);
|
||||
extern int __IBVSocket_recvWC(IBVSocket* _this, int timeoutMS, struct ib_wc* outWC);
|
||||
|
||||
extern int __IBVSocket_flowControlOnRecv(IBVSocket* _this, int timeoutMS);
|
||||
extern void __IBVSocket_flowControlOnSendUpdateCounters(IBVSocket* _this);
|
||||
extern int __IBVSocket_flowControlOnSendWait(IBVSocket* _this, int timeoutMS);
|
||||
|
||||
extern int __IBVSocket_waitForRecvCompletionEvent(IBVSocket* _this, int timeoutMS,
|
||||
struct ib_wc* outWC);
|
||||
extern int __IBVSocket_waitForSendCompletionEvent(IBVSocket* _this, int oldSendCount,
|
||||
int timeoutMS);
|
||||
extern int __IBVSocket_waitForTotalSendCompletion(IBVSocket* _this,
|
||||
unsigned* numSendElements, unsigned* numWriteElements, unsigned* numReadElements, int timeoutMS);
|
||||
|
||||
extern ssize_t __IBVSocket_recvContinueIncomplete(IBVSocket* _this, struct iov_iter* iter);
|
||||
|
||||
extern int __IBVSocket_cmaHandler(struct rdma_cm_id* cm_id, struct rdma_cm_event* event);
|
||||
extern void __IBVSocket_cqSendEventHandler(struct ib_event* event, void* data);
|
||||
extern void __IBVSocket_sendCompletionHandler(struct ib_cq* cq, void* cq_context);
|
||||
extern void __IBVSocket_cqRecvEventHandler(struct ib_event* event, void* data);
|
||||
extern void __IBVSocket_recvCompletionHandler(struct ib_cq* cq, void* cq_context);
|
||||
extern void __IBVSocket_qpEventHandler(struct ib_event* event, void* data);
|
||||
extern int __IBVSocket_routeResolvedHandler(IBVSocket* _this, struct rdma_cm_id* cm_id,
|
||||
IBVCommConfig* commCfg, IBVCommContext** outCommContext);
|
||||
extern int __IBVSocket_connectedHandler(IBVSocket* _this, struct rdma_cm_event *event);
|
||||
|
||||
extern struct ib_cq* __IBVSocket_createCompletionQueue(struct ib_device* device,
|
||||
ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *),
|
||||
void* cq_context, int cqe);
|
||||
|
||||
extern const char* __IBVSocket_wcStatusStr(int wcStatusCode);
|
||||
|
||||
|
||||
enum IBVSocketConnState
|
||||
{
|
||||
IBVSOCKETCONNSTATE_UNCONNECTED=0,
|
||||
IBVSOCKETCONNSTATE_CONNECTING=1,
|
||||
IBVSOCKETCONNSTATE_ADDRESSRESOLVED=2,
|
||||
IBVSOCKETCONNSTATE_ROUTERESOLVED=3,
|
||||
IBVSOCKETCONNSTATE_ESTABLISHED=4,
|
||||
IBVSOCKETCONNSTATE_FAILED=5,
|
||||
IBVSOCKETCONNSTATE_REJECTED_STALE=6
|
||||
};
|
||||
|
||||
|
||||
struct IBVIncompleteRecv
|
||||
{
|
||||
int isAvailable;
|
||||
int completedOffset;
|
||||
int bufIndex;
|
||||
int totalSize;
|
||||
};
|
||||
|
||||
struct IBVIncompleteSend
|
||||
{
|
||||
unsigned numAvailable;
|
||||
bool forceWaitForAll; // true if we received only some completions and need
|
||||
// to wait for the rest before we can send more data
|
||||
};
|
||||
|
||||
struct IBVCommContext
|
||||
{
|
||||
struct ib_pd* pd; // protection domain
|
||||
struct ib_mr* dmaMR; // system DMA MR. Not supported on all platforms.
|
||||
atomic_t recvCompEventCount; // incremented on incoming event notification
|
||||
wait_queue_head_t recvCompWaitQ; // for recvCompEvents
|
||||
wait_queue_t recvWait;
|
||||
bool recvWaitInitialized; // true if init_wait was called for the thread
|
||||
atomic_t sendCompEventCount; // incremented on incoming event notification
|
||||
wait_queue_head_t sendCompWaitQ; // for sendCompEvents
|
||||
wait_queue_t sendWait;
|
||||
bool sendWaitInitialized; // true if init_wait was called for the thread
|
||||
|
||||
struct ib_cq* recvCQ; // recv completion queue
|
||||
struct ib_cq* sendCQ; // send completion queue
|
||||
struct ib_qp* qp; // send+recv queue pair
|
||||
|
||||
IBVCommConfig commCfg;
|
||||
struct IBVBuffer* sendBufs;
|
||||
struct IBVBuffer* recvBufs;
|
||||
struct IBVBuffer checkConBuffer;
|
||||
unsigned numReceivedBufsLeft; // flow control v2 to avoid IB rnr timeout
|
||||
unsigned numSendBufsLeft; // flow control v2 to avoid IB rnr timeout
|
||||
|
||||
IBVIncompleteRecv incompleteRecv;
|
||||
IBVIncompleteSend incompleteSend;
|
||||
u32 checkConnRkey;
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
// Note: Make sure this struct has the same size on all architectures (because we use
|
||||
// sizeof(IBVCommDest) for private_data during handshake)
|
||||
struct IBVCommDest
|
||||
{
|
||||
char verificationStr[IBVSOCKET_PRIVATEDATA_STR_LEN];
|
||||
uint64_t protocolVersion;
|
||||
uint64_t vaddr;
|
||||
unsigned rkey;
|
||||
unsigned recvBufNum;
|
||||
unsigned recvBufSize;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
struct IBVSocket
|
||||
{
|
||||
wait_queue_head_t eventWaitQ; // used to wait for connState change during connect
|
||||
|
||||
|
||||
struct rdma_cm_id* cm_id;
|
||||
struct in_addr srcIpAddr;
|
||||
|
||||
IBVCommDest localDest;
|
||||
IBVCommDest* remoteDest;
|
||||
|
||||
IBVCommContext* commContext;
|
||||
|
||||
int errState; // 0 = <no error>; -1 = <unspecified error>
|
||||
|
||||
volatile IBVSocketConnState_t connState;
|
||||
|
||||
int typeOfService;
|
||||
unsigned remapConnectionFailureStatus;
|
||||
NicAddressStats* nicStats; // Owned by a NodeConnPool instance. Do not access
|
||||
// members without locking the NodeConnPool mutex.
|
||||
// Possibly NULL.
|
||||
IBVTimeoutConfig timeoutCfg;
|
||||
Mutex cmaMutex; // used to manage concurrency of cm_id and commContext
|
||||
// with __IBVSocket_cmaHandler
|
||||
};
|
||||
|
||||
|
||||
#else
|
||||
|
||||
|
||||
struct IBVSocket
|
||||
{
|
||||
/* empty structs are not allowed, so until this kludge can go, add a dummy member */
|
||||
unsigned:0;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif /*OPENTK_IBVSOCKET_H_*/
|
||||
114
client_module/source/common/net/sock/ibv/No_IBVSocket.c
Normal file
114
client_module/source/common/net/sock/ibv/No_IBVSocket.c
Normal file
@@ -0,0 +1,114 @@
|
||||
#include "IBVSocket.h"
|
||||
|
||||
#ifndef BEEGFS_RDMA
|
||||
|
||||
#define no_ibvsocket_err() \
|
||||
printk_fhgfs(KERN_INFO, "%s:%d: You should never see this message\n", __func__, __LINE__)
|
||||
|
||||
bool IBVSocket_init(IBVSocket* _this, struct in_addr srcIpAddr, NicAddressStats* nicStats)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return false;
|
||||
}
|
||||
|
||||
void IBVSocket_uninit(IBVSocket* _this)
|
||||
{
|
||||
// nothing to be done here
|
||||
}
|
||||
|
||||
bool IBVSocket_rdmaDevicesExist(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IBVSocket_connectByIP(IBVSocket* _this, struct in_addr ipaddress, unsigned short port,
|
||||
IBVCommConfig* commCfg)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IBVSocket_bindToAddr(IBVSocket* _this, struct in_addr ipAddr, unsigned short port)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IBVSocket_listen(IBVSocket* _this)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IBVSocket_shutdown(IBVSocket* _this)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return false;
|
||||
}
|
||||
|
||||
ssize_t IBVSocket_recvT(IBVSocket* _this, struct iov_iter* iter, int flags, int timeoutMS)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return -1;
|
||||
}
|
||||
|
||||
ssize_t IBVSocket_send(IBVSocket* _this, struct iov_iter* iter, int flags)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return 0 on success, -1 on error
|
||||
*/
|
||||
int IBVSocket_checkConnection(IBVSocket* _this)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned long IBVSocket_poll(IBVSocket* _this, short events, bool finishPoll)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return ~0;
|
||||
}
|
||||
|
||||
unsigned IBVSocket_getRkey(IBVSocket* _this)
|
||||
{
|
||||
no_ibvsocket_err();
|
||||
return ~0;
|
||||
}
|
||||
|
||||
struct ib_device* IBVSocket_getDevice(IBVSocket* _this)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void IBVSocket_setTimeouts(IBVSocket* _this, int connectMS,
|
||||
int completionMS, int flowSendMS, int flowRecvMS, int pollMS)
|
||||
{
|
||||
}
|
||||
|
||||
void IBVSocket_setTypeOfService(IBVSocket* _this, int typeOfService)
|
||||
{
|
||||
}
|
||||
|
||||
void IBVSocket_setConnectionFailureStatus(IBVSocket* _this, unsigned value)
|
||||
{
|
||||
}
|
||||
|
||||
struct in_addr IBVSocket_getSrcIpAddr(IBVSocket* _this)
|
||||
{
|
||||
struct in_addr r = {
|
||||
.s_addr = ~0
|
||||
};
|
||||
return r;
|
||||
}
|
||||
|
||||
NicAddressStats* IBVSocket_getNicStats(IBVSocket* _this)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user