2025-08-10 01:34:16 +02:00

236 lines
7.3 KiB
C

#include <common/toolkit/SocketTk.h>
#include <common/net/sock/StandardSocket.h>
#include <common/net/sock/RDMASocket.h>
#include <common/Common.h>
#include <common/toolkit/TimeTk.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/poll.h>
#include <linux/net.h>
#include <linux/socket.h>
struct file* SocketTkDummyFilp = NULL;
/**
* One-time initialization of a dummy file pointer (required for polling)
* Note: remember to call the corresponding uninit routine
*/
bool SocketTk_initOnce(void)
{
SocketTkDummyFilp = filp_open("/dev/null", O_RDONLY, 0);
if(IS_ERR(SocketTkDummyFilp) )
{
printk_fhgfs(KERN_WARNING, "Failed to open the dummy filp for polling\n");
return false;
}
return true;
}
void SocketTk_uninitOnce(void)
{
if(SocketTkDummyFilp && !IS_ERR(SocketTkDummyFilp) )
filp_close(SocketTkDummyFilp, NULL);
}
/*
* Synchronous I/O multiplexing for standard and RDMA sockets.
* Note: comparable to userspace poll()
*
* @param pollState list of sockets and event wishes
* @return number of socks for which interesting events are available or negative linux error code.
* (tvp is being set to time remaining)
*/
int SocketTk_poll(PollState* state, int timeoutMS)
{
struct poll_wqueues stdTable;
poll_table* stdWait = NULL; // value NULL means "don't register for waiting"
int table_err = 0;
int numSocksWithREvents = 0; // the return value
Socket* socket;
long __timeout = TimeTk_msToJiffiesSchedulable(timeoutMS);
/* 4.19 (vanilla, not stable) had a bug in the sock_poll_wait signature. rhel 4.18 backports
* this bug. 4.19.1 fixes it again. */
BUILD_BUG_ON(__builtin_types_compatible_p(
__typeof__(&sock_poll_wait),
void (*)(struct file*, poll_table*)));
poll_initwait(&stdTable);
if(__timeout)
stdWait = &stdTable.pt;
// 1st loop: register the socks that we're waiting for and wait blocking
// if no data is available yet
// 2nd loop (after event or timeout): check all socks for available data
// note: std socks return all events, even those we haven't been waiting for
// 3rd and futher loops: in case an uninteresting event occurred
for( ; ; )
{
numSocksWithREvents = 0; // (must be inside the loop to be consistent)
/* wait INTERRUPTIBLE if no signal is pending, otherwise the wait contributes to load.
* users of this function should be migrated to use socket callbacks instead. */
set_current_state(signal_pending(current) ? TASK_KILLABLE : TASK_INTERRUPTIBLE);
// for each sock: ask for available data and register waitqueue
list_for_each_entry(socket, &state->list, poll._list)
{
if(Socket_getSockType(socket) == NICADDRTYPE_RDMA)
{ // RDMA socket
struct RDMASocket* currentRDMASock = (RDMASocket*)socket;
bool finishPoll = (numSocksWithREvents || !__timeout);
unsigned long mask = RDMASocket_poll(
currentRDMASock, socket->poll._events, finishPoll);
if(mask)
{ // interesting event occurred
socket->poll.revents = mask; // save event mask as revents
numSocksWithREvents++;
}
}
else
{ // Standard socket
struct socket* currentRawSock =
StandardSocket_getRawSock( (StandardSocket*)socket);
poll_table* currentStdWait = numSocksWithREvents ? NULL : stdWait;
unsigned long mask = (*currentRawSock->ops->poll)(
SocketTkDummyFilp, currentRawSock, currentStdWait);
if(mask & (socket->poll._events | POLLERR | POLLHUP | POLLNVAL) )
{ // interesting event occurred
socket->poll.revents = mask; // save event mask as revents
numSocksWithREvents++;
}
//cond_resched(); // (commented out for latency reasons)
}
} // end of for_each_socket loop
stdWait = NULL; // don't register standard socks for waiting in following loops
// skip the waiting if we already found something
if (numSocksWithREvents || !__timeout || fatal_signal_pending(current))
break;
// skip the waiting if we have an error
if(unlikely(stdTable.error) )
{
table_err = stdTable.error;
break;
}
// wait (and reduce remaining timeout)
__timeout = schedule_timeout(__timeout);
} // end of sleep loop
__set_current_state(TASK_RUNNING);
// cleanup loop for RDMA socks
list_for_each_entry(socket, &state->list, poll._list)
{
if(Socket_getSockType(socket) == NICADDRTYPE_RDMA)
{
struct RDMASocket* currentRDMASock = (RDMASocket*)socket;
RDMASocket_poll(currentRDMASock, socket->poll._events, true);
}
}
// cleanup for standard socks
poll_freewait(&stdTable);
//printk_fhgfs(KERN_INFO, "%s:%d: return %d\n", __func__, __LINE__,
// table_err ? table_err : numSocksWithREvents); // debug in
return table_err ? table_err : numSocksWithREvents;
}
/**
* Note: Old kernel versions do not support validation of the IP string.
*
* @param outIPAddr set to INADDR_NONE if an error was detected
* @return false for wrong input on modern kernels (>= 2.6.20), old kernels always return
* true
*/
bool SocketTk_getHostByAddrStr(const char* hostAddr, struct in_addr* outIPAddr)
{
if(unlikely(!in4_pton(hostAddr, strlen(hostAddr), (u8 *)outIPAddr, -1, NULL) ) )
{ // not a valid address string
outIPAddr->s_addr = INADDR_NONE;
return false;
}
return true;
}
/**
* Note: Better use SocketTk_getHostByAddrStr(), which can also check for errors on recent kernels.
*
* @return INADDR_NONE if an error was detected (recent kernels only)
*/
struct in_addr SocketTk_in_aton(const char* hostAddr)
{
struct in_addr retVal;
// Note: retVal INADDR_NONE will be set by getHostByAddrStr()
SocketTk_getHostByAddrStr(hostAddr, &retVal);
return retVal;
}
/**
* @param buf the buffer to which <IP> should be written.
*/
void SocketTk_ipaddrToStrNoAlloc(struct in_addr ipaddress, char* ipStr, size_t ipStrLen)
{
int printRes = snprintf(ipStr, ipStrLen, "%pI4", &ipaddress);
if(unlikely( (size_t)printRes >= ipStrLen) )
ipStr[ipStrLen-1] = 0; // ipStrLen exceeded => zero-terminate result
}
/**
* @return string is kalloced and needs to be kfreed
*/
char* SocketTk_ipaddrToStr(struct in_addr ipaddress)
{
char* ipStr = os_kmalloc(SOCKETTK_IPADDRSTR_LEN);
if (likely(ipStr != NULL))
SocketTk_ipaddrToStrNoAlloc(ipaddress, ipStr, SOCKETTK_IPADDRSTR_LEN);
return ipStr;
}
/**
* @param buf the buffer to which <IP>:<port> should be written.
*/
void SocketTk_endpointAddrToStrNoAlloc(char* buf, size_t bufLen, struct in_addr ipaddress,
unsigned short port)
{
int printRes = snprintf(buf, bufLen, "%pI4:%u", &ipaddress, port);
if(unlikely( (unsigned)printRes >= bufLen) )
buf[bufLen-1] = 0; // bufLen exceeded => zero-terminate result
}
/**
* @return string is kalloced and needs to be kfreed
*/
char* SocketTk_endpointAddrToStr(struct in_addr ipaddress, unsigned short port)
{
char* endpointStr = os_kmalloc(SOCKETTK_ENDPOINTSTR_LEN);
if (likely(endpointStr != NULL))
SocketTk_endpointAddrToStrNoAlloc(endpointStr, SOCKETTK_ENDPOINTSTR_LEN, ipaddress, port);
return endpointStr;
}