New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,307 @@
#include <app/App.h>
#include <common/net/message/control/AckMsgEx.h>
#include <common/toolkit/Time.h>
#include "AckManager.h"
#define ACKMANAGER_MSGBUF_LEN 4096
void AckManager_init(AckManager* this, App* app)
{
Thread_init(&this->thread, BEEGFS_THREAD_NAME_PREFIX_STR "AckMgr", __AckManager_run);
this->app = app;
this->cfg = App_getConfig(app);
this->ackMsgBuf = vmalloc(ACKMANAGER_MSGBUF_LEN);
Mutex_init(&this->ackQueueMutex);
Condition_init(&this->ackQueueAddedCond);
PointerList_init(&this->ackQueue);
}
struct AckManager* AckManager_construct(App* app)
{
struct AckManager* this = (AckManager*)os_kmalloc(sizeof(*this) );
AckManager_init(this, app);
return this;
}
void AckManager_uninit(AckManager* this)
{
PointerListIter iter;
// free ackQ elements
PointerListIter_init(&iter, &this->ackQueue);
while(!PointerListIter_end(&iter) )
{
__AckManager_freeQueueEntry(this, (AckQueueEntry*)PointerListIter_value(&iter) );
PointerListIter_next(&iter);
}
PointerList_uninit(&this->ackQueue);
Mutex_uninit(&this->ackQueueMutex);
vfree(this->ackMsgBuf);
Thread_uninit( (Thread*)this);
}
void AckManager_destruct(AckManager* this)
{
AckManager_uninit(this);
kfree(this);
}
void _AckManager_requestLoop(AckManager* this)
{
int sleepTimeMS = 2500;
Thread* thisThread = (Thread*)this;
while(!Thread_getSelfTerminate(thisThread) )
{
// wait for new queue entries
Mutex_lock(&this->ackQueueMutex); // L O C K
if(!PointerList_length(&this->ackQueue) )
{
Condition_timedwaitInterruptible(
&this->ackQueueAddedCond, &this->ackQueueMutex, sleepTimeMS);
}
Mutex_unlock(&this->ackQueueMutex); // U N L O C K
// process new queue entries (if any)
__AckManager_processAckQueue(this);
}
}
void __AckManager_run(Thread* this)
{
AckManager* thisCast = (AckManager*)this;
const char* logContext = "AckManager (run)";
Logger* log = App_getLogger(thisCast->app);
_AckManager_requestLoop(thisCast);
Logger_log(log, 4, logContext, "Component stopped.");
}
/**
* Send ack for all enqueued entries.
*/
void __AckManager_processAckQueue(AckManager* this)
{
Logger* log = App_getLogger(this->app);
const char* logContext = "Queue processing";
const int maxNumCommRetries = 1; /* note: we really don't want to wait too long in this method
because time would run out for other acks then, so only a single retry allowed */
PointerListIter iter;
Mutex_lock(&this->ackQueueMutex); // L O C K
PointerListIter_init(&iter, &this->ackQueue);
while(!PointerListIter_end(&iter) && !Thread_getSelfTerminate( (Thread*)this) )
{
AckQueueEntry* currentAck = PointerListIter_value(&iter);
NodeStoreEx* metaNodes = App_getMetaNodes(this->app);
Node* node;
AckMsgEx msg;
unsigned msgLen;
bool serialRes;
NodeConnPool* connPool;
int currentRetryNum;
Socket* sock;
ssize_t sendRes = 0;
bool removeAllNextByNode = false;
node = NodeStoreEx_referenceNode(metaNodes, currentAck->metaNodeID);
if(unlikely(!node) )
{ // node not found in store
Logger_logFormatted(log, Log_DEBUG, logContext, "Metadata node no longer exists: %hu",
currentAck->metaNodeID.value);
goto remove;
}
connPool = Node_getConnPool(node);
AckMsgEx_initFromValue(&msg, currentAck->ackID);
msgLen = NetMessage_getMsgLength( (NetMessage*)&msg);
serialRes = NetMessage_serialize( (NetMessage*)&msg, this->ackMsgBuf, ACKMANAGER_MSGBUF_LEN);
if(unlikely(!serialRes) )
{ // serialization failed
Logger_logFormatted(log, Log_CRITICAL, logContext,
"BUG(?): Unable to serialize ack msg for metadata node: %hu (ack: %s)",
currentAck->metaNodeID.value, currentAck->ackID);
goto release;
}
for(currentRetryNum=0; currentRetryNum <= maxNumCommRetries; currentRetryNum++)
{
if(currentRetryNum == 1)
{ // inform user about retry (only on first retry to not spam the log)
Logger_logFormatted(log, Log_NOTICE, logContext,
"Retrying communication with metadata node: %hu", currentAck->metaNodeID.value);
}
// unlock, so that more entries can be added to the queue during remoting without waiting
Mutex_unlock(&this->ackQueueMutex); // U N L O C K
// connect & communicate
sock = NodeConnPool_acquireStreamSocket(connPool);
if(likely(sock) )
{ // send msg
sendRes = Socket_send_kernel(sock, this->ackMsgBuf, msgLen, 0);
if(unlikely(sendRes != (ssize_t) msgLen) )
{ // comm error => invalidate conn
NodeConnPool_invalidateStreamSocket(connPool, sock);
}
else
NodeConnPool_releaseStreamSocket(connPool, sock);
}
Mutex_lock(&this->ackQueueMutex); // R E L O C K
// check comm errors
if(unlikely(!sock || (sendRes != (ssize_t) msgLen) ) )
{ // no connection or communication error
Logger_logFormatted(log, Log_NOTICE, logContext,
"Communication with metadata node failed: %hu", currentAck->metaNodeID.value);
removeAllNextByNode = true; // (only effective if no more retries)
continue;
}
removeAllNextByNode = false;
break; // communication succeeded => we're done with this retry-loop
} // end of comm retry for-loop
if(removeAllNextByNode)
{ // comm with current node failed => remove all following entries with this nodeID
// note: only following entries, because current entry will be free'd below anyways.
PointerListIter iterNext = iter;
PointerListIter_next(&iterNext);
__AckManager_removeQueueEntriesByNode(this, currentAck->metaNodeID, iterNext);
}
release:
Node_put(node);
remove:
__AckManager_freeQueueEntry(this, currentAck);
iter = PointerListIter_remove(&iter);
} // end of while(!list_end) loop
Mutex_unlock(&this->ackQueueMutex); // U N L O C K
}
/**
* Frees/uninits all sub-fields and kfrees the closeEntry itself (but does not remove it from the
* queue).
*/
void __AckManager_freeQueueEntry(AckManager* this, AckQueueEntry* ackEntry)
{
kfree(ackEntry->ackID);
kfree(ackEntry);
}
/**
* Free all entries in the queue for the given nodeID.
* (Typcally used when communication with a certain node failed.)
*
* @param iter starting point for removal
*/
void __AckManager_removeQueueEntriesByNode(AckManager* this, NumNodeID nodeID,
PointerListIter iter)
{
while(!PointerListIter_end(&iter) )
{
AckQueueEntry* currentAck = PointerListIter_value(&iter);
if(NumNodeID_compare(&nodeID, &currentAck->metaNodeID))
{ // nodeID matches
__AckManager_freeQueueEntry(this, currentAck);
iter = PointerListIter_remove(&iter);
}
else
PointerListIter_next(&iter);
}
}
/**
* Add an ack that should reliably (i.e. not via UDP) be transmitted to the given meta server.
*
* @param metaNodeID will be copied
* @param ackID will be copied
*/
void AckManager_addAckToQueue(AckManager* this, NumNodeID metaNodeID, const char* ackID)
{
AckQueueEntry* newEntry = (AckQueueEntry*)os_kmalloc(sizeof(*newEntry) );
Time_init(&newEntry->ageT);
newEntry->metaNodeID = metaNodeID;
newEntry->ackID = StringTk_strDup(ackID);
// add new entry to queue and wake up AckManager thread...
Mutex_lock(&this->ackQueueMutex); // L O C K
PointerList_append(&this->ackQueue, newEntry);
Condition_signal(&this->ackQueueAddedCond);
Mutex_unlock(&this->ackQueueMutex); // U N L O C K
}
size_t AckManager_getAckQueueSize(AckManager* this)
{
size_t retVal;
Mutex_lock(&this->ackQueueMutex); // L O C K
retVal = PointerList_length(&this->ackQueue);
Mutex_unlock(&this->ackQueueMutex); // U N L O C K
return retVal;
}

View File

@@ -0,0 +1,71 @@
#ifndef ACKMANAGER_H_
#define ACKMANAGER_H_
#include <app/config/Config.h>
#include <app/log/Logger.h>
#include <common/toolkit/list/PointerListIter.h>
#include <components/AckManager.h>
#include <common/threading/Thread.h>
#include <common/threading/Mutex.h>
#include <common/threading/Condition.h>
#include <common/net/message/NetMessage.h>
#include <nodes/NodeStoreEx.h>
/*
* note: AckEntry-queues management is integrated into AckManager, because it needs
* direct access to the queues via iterators and relies on special access patterns, e.g.
* it must be the only one removing entries from the queue (to keep iterators valid).
*/
// forward declarations...
struct AckQueueEntry;
typedef struct AckQueueEntry AckQueueEntry;
struct AckManager;
typedef struct AckManager AckManager;
extern void AckManager_init(AckManager* this, App* app);
extern AckManager* AckManager_construct(App* app);
extern void AckManager_uninit(AckManager* this);
extern void AckManager_destruct(AckManager* this);
extern void _AckManager_requestLoop(AckManager* this);
extern void __AckManager_run(Thread* this);
extern void __AckManager_processAckQueue(AckManager* this);
extern void __AckManager_freeQueueEntry(AckManager* this, AckQueueEntry* ackEntry);
extern void __AckManager_removeQueueEntriesByNode(AckManager* this, NumNodeID nodeID,
PointerListIter iter);
extern void AckManager_addAckToQueue(AckManager* this, NumNodeID metaNodeID, const char* ackID);
// getters & setters
extern size_t AckManager_getAckQueueSize(AckManager* this);
struct AckQueueEntry
{
Time ageT; // time when this entry was created (to compute entry age)
NumNodeID metaNodeID;
const char* ackID;
};
struct AckManager
{
Thread thread; // base class
App* app;
Config* cfg;
char* ackMsgBuf; // static buffer for message serialization
Mutex ackQueueMutex; // for ackQueue
Condition ackQueueAddedCond; // when entries are added to queue
PointerList ackQueue; /* remove from head, add to tail (important, because we rely on
validity of an iterator even when a new entry was added) */
};
#endif /* ACKMANAGER_H_ */

View File

@@ -0,0 +1,287 @@
#include "DatagramListener.h"
#include <common/toolkit/SocketTk.h>
#include <common/toolkit/MessagingTk.h>
#include <common/toolkit/NetFilter.h>
#include <common/toolkit/Serialization.h>
#include <net/message/NetMessageFactory.h>
#include <linux/in.h>
void __DatagramListener_run(Thread* this)
{
DatagramListener* thisCast = (DatagramListener*)this;
Logger* log = App_getLogger(thisCast->app);
const char* logContext = "DatagramListener (run)";
__DatagramListener_initBuffers(thisCast);
__DatagramListener_listenLoop(thisCast);
Logger_log(log, 4, logContext, "Component stopped.");
}
void __DatagramListener_listenLoop(DatagramListener* this)
{
Logger* log = App_getLogger(this->app);
const char* logContext = "DatagramListener (listen loop)";
Thread* thisThread = (Thread*)this;
fhgfs_sockaddr_in fromAddr;
const int recvTimeoutMS = 2000;
while(!Thread_getSelfTerminate(thisThread) )
{
NetMessage* msg;
ssize_t recvRes;
struct iov_iter *iter = STACK_ALLOC_BEEGFS_ITER_KVEC(this->recvBuf, DGRAMMGR_RECVBUF_SIZE, READ);
recvRes = StandardSocket_recvfromT(this->udpSock, iter, 0, &fromAddr, recvTimeoutMS);
if(recvRes == -ETIMEDOUT)
{ // timeout: nothing to worry about, just idle
continue;
}
else
if(recvRes == 0)
{
char* fromIP = SocketTk_ipaddrToStr(fromAddr.addr);
Logger_logFormatted(log, Log_NOTICE, logContext,
"Received an empty datagram. IP: %s; port: %d",
fromIP, fromAddr.port);
kfree(fromIP);
continue;
}
else
if(unlikely(recvRes < 0) )
{ // error
Logger_logErrFormatted(log, logContext,
"Encountered an unrecoverable socket error. ErrCode: %ld", recvRes);
break;
}
if(__DatagramListener_isDGramFromLocalhost(this, &fromAddr) )
{
//log.log(5, "Discarding DGram from localhost");
continue;
}
msg = NetMessageFactory_createFromBuf(this->app, this->recvBuf, recvRes);
if (msg->msgHeader.msgType == NETMSGTYPE_Invalid
|| msg->msgHeader.msgLength != recvRes
|| msg->msgHeader.msgSequence != 0
|| msg->msgHeader.msgSequenceDone != 0)
{
char* ipStr = SocketTk_ipaddrToStr(fromAddr.addr);
Logger_logFormatted(this->app->logger, Log_NOTICE, logContext,
"Received invalid message from peer %s", ipStr);
kfree(ipStr);
}
else
{
_DatagramListener_handleIncomingMsg(this, &fromAddr, msg);
}
NETMESSAGE_FREE(msg);
} // end of while loop
}
void _DatagramListener_handleIncomingMsg(DatagramListener* this,
fhgfs_sockaddr_in* fromAddr, NetMessage* msg)
{
Logger* log = App_getLogger(this->app);
const char* logContext = "DatagramListener (incoming msg)";
switch(NetMessage_getMsgType(msg) )
{
// An ack has historically been considered a valid message in this context, but the client
// doesn't actually do anything with them. Ack messages are handled as a SimpleStringMsg which
// uses the default NetMessage_processIncoming() handler which always returns false causing a
// confusing "problem encountered" error to be logged if `processIncoming()` is called below.
// Probably historically this wasn't an issue because clients didn't usually see acks, but at
// least with the 8.0 mgmtd this can happen when the client and mgmtd are on the same node.
case NETMSGTYPE_Ack:
{
Logger_log(log, 4, logContext, "Ignoring incoming ack message");
} break;
// valid messages within this context
case NETMSGTYPE_HeartbeatRequest:
case NETMSGTYPE_Heartbeat:
case NETMSGTYPE_MapTargets:
case NETMSGTYPE_RemoveNode:
case NETMSGTYPE_LockGranted:
case NETMSGTYPE_RefreshTargetStates:
case NETMSGTYPE_SetMirrorBuddyGroup:
{
if(!msg->ops->processIncoming(msg, this->app, fromAddr, (Socket*)this->udpSock,
this->sendBuf, DGRAMMGR_SENDBUF_SIZE) )
{
Logger_logFormatted(log, 2, logContext,
"Problem encountered during handling of incoming message of type %d",
NetMessage_getMsgType(msg));
}
} break;
default:
{ // valid fhgfs message, but not allowed within this context
char* ipStr = SocketTk_ipaddrToStr(fromAddr->addr);
Logger_logErrFormatted(log, logContext, "Received a message of type %d "
"that is invalid within the current context from: %s",
NetMessage_getMsgType(msg), ipStr);
kfree(ipStr);
} break;
};
}
bool __DatagramListener_initSock(DatagramListener* this, unsigned short udpPort)
{
Config* cfg = App_getConfig(this->app);
Logger* log = App_getLogger(this->app);
const char* logContext = "DatagramListener (init sock)";
bool broadcastRes;
bool bindRes;
Socket* udpSockBase;
int bufsize;
this->udpPortNetByteOrder = htons(udpPort);
this->udpSock = StandardSocket_constructUDP();
if(!this->udpSock)
{
Logger_logErr(log, logContext, "Initialization of UDP socket failed");
return false;
}
udpSockBase = &this->udpSock->pooledSocket.socket;
// set some socket options
broadcastRes = StandardSocket_setSoBroadcast(this->udpSock, true);
if(!broadcastRes)
{
Logger_logErr(log, logContext, "Enabling broadcast for UDP socket failed.");
goto err_valid;
}
bufsize = Config_getConnUDPRcvBufSize(cfg);
if (bufsize > 0)
StandardSocket_setSoRcvBuf(this->udpSock, bufsize);
// bind the socket
bindRes = Socket_bind(udpSockBase, udpPort);
if(!bindRes)
{
Logger_logErrFormatted(log, logContext, "Binding UDP socket to port %d failed.", udpPort);
goto err_valid;
}
Logger_logFormatted(log, 3, logContext, "Listening for UDP datagrams: Port %d", udpPort);
return true;
err_valid:
Socket_virtualDestruct(udpSockBase);
return false;
}
/**
* Note: Delayed init of buffers (better for NUMA).
*/
void __DatagramListener_initBuffers(DatagramListener* this)
{
this->recvBuf = (char*)vmalloc(DGRAMMGR_RECVBUF_SIZE);
this->sendBuf = (char*)vmalloc(DGRAMMGR_SENDBUF_SIZE);
}
/**
* Sends the buffer to all available node interfaces.
*/
static void DatagramListener_sendBufToNode_kernel(DatagramListener* this, Node* node,
char* buf, size_t bufLen)
{
NodeConnPool* connPool = Node_getConnPool(node);
NicAddressList* nicList;
unsigned short port = Node_getPortUDP(node);
NicAddressListIter iter;
NodeConnPool_lock(connPool);
nicList = NodeConnPool_getNicListLocked(connPool);
NicAddressListIter_init(&iter, nicList);
for( ; !NicAddressListIter_end(&iter); NicAddressListIter_next(&iter) )
{
NicAddress* nicAddr = NicAddressListIter_value(&iter);
if(nicAddr->nicType != NICADDRTYPE_STANDARD)
continue;
if(!NetFilter_isAllowed(this->netFilter, nicAddr->ipAddr) )
continue;
DatagramListener_sendtoIP_kernel(this, buf, bufLen, 0, nicAddr->ipAddr, port);
}
NodeConnPool_unlock(connPool);
}
/**
* Sends the message to all available node interfaces.
*/
void DatagramListener_sendMsgToNode(DatagramListener* this, Node* node, NetMessage* msg)
{
char* msgBuf = MessagingTk_createMsgBuf(msg);
unsigned msgLen = NetMessage_getMsgLength(msg);
DatagramListener_sendBufToNode_kernel(this, node, msgBuf, msgLen);
kfree(msgBuf);
}
bool __DatagramListener_isDGramFromLocalhost(DatagramListener* this,
fhgfs_sockaddr_in* fromAddr)
{
NodeConnPool* connPool;
NicAddressList* nicList;
NicAddressListIter iter;
int nicListSize;
int i;
bool result = false;
if(fromAddr->port != this->udpPortNetByteOrder)
return false;
// (inaddr_loopback is in host byte order)
if(ntohl(INADDR_LOOPBACK) == fromAddr->addr.s_addr)
return true;
connPool = Node_getConnPool(this->localNode);
NodeConnPool_lock(connPool);
nicList = NodeConnPool_getNicListLocked(connPool);
NicAddressListIter_init(&iter, nicList);
nicListSize = NicAddressList_length(nicList);
for(i = 0; i < nicListSize; i++, NicAddressListIter_next(&iter) )
{
NicAddress* nicAddr = NicAddressListIter_value(&iter);
if(nicAddr->ipAddr.s_addr == fromAddr->addr.s_addr)
{
result = true;
break;
}
}
NodeConnPool_unlock(connPool);
return result;
}

View File

@@ -0,0 +1,156 @@
#ifndef DATAGRAMLISTENER_H_
#define DATAGRAMLISTENER_H_
#include <app/log/Logger.h>
#include <common/threading/Thread.h>
#include <common/net/sock/StandardSocket.h>
#include <common/net/message/NetMessage.h>
#include <nodes/NodeStoreEx.h>
#include <common/toolkit/NetFilter.h>
#include <common/Common.h>
#define DGRAMMGR_RECVBUF_SIZE 65536
#define DGRAMMGR_SENDBUF_SIZE DGRAMMGR_RECVBUF_SIZE
struct DatagramListener;
typedef struct DatagramListener DatagramListener;
static inline __must_check bool DatagramListener_init(DatagramListener* this, App* app,
Node* localNode, unsigned short udpPort);
static inline DatagramListener* DatagramListener_construct(App* app, Node* localNode,
unsigned short udpPort);
static inline void DatagramListener_uninit(DatagramListener* this);
static inline void DatagramListener_destruct(DatagramListener* this);
//extern ssize_t DatagramListener_broadcast(DatagramListener* this, void* buf, size_t len,
// int flags, unsigned short port); // no longer needed
extern void DatagramListener_sendMsgToNode(DatagramListener* this, Node* node, NetMessage* msg);
extern void __DatagramListener_run(Thread* this);
extern void __DatagramListener_listenLoop(DatagramListener* this);
extern void _DatagramListener_handleIncomingMsg(DatagramListener* this,
fhgfs_sockaddr_in* fromAddr, NetMessage* msg);
extern bool __DatagramListener_initSock(DatagramListener* this, unsigned short udpPort);
extern void __DatagramListener_initBuffers(DatagramListener* this);
extern bool __DatagramListener_isDGramFromLocalhost(DatagramListener* this,
fhgfs_sockaddr_in* fromAddr);
struct DatagramListener
{
Thread thread;
App* app;
char* recvBuf;
Node* localNode;
NetFilter* netFilter;
StandardSocket* udpSock;
unsigned short udpPortNetByteOrder;
char* sendBuf;
Mutex sendMutex;
};
bool DatagramListener_init(DatagramListener* this, App* app, Node* localNode,
unsigned short udpPort)
{
Thread_init( (Thread*)this, BEEGFS_THREAD_NAME_PREFIX_STR "DGramLis", __DatagramListener_run);
this->app = app;
this->udpSock = NULL;
this->localNode = localNode;
this->netFilter = App_getNetFilter(app);
this->recvBuf = NULL;
this->sendBuf = NULL;
Mutex_init(&this->sendMutex);
if(!__DatagramListener_initSock(this, udpPort) )
{
Logger* log = App_getLogger(app);
const char* logContext = "DatagramListener_init";
Logger_logErr(log, logContext, "Unable to initialize the socket");
goto err;
}
return true;
err:
Mutex_uninit(&this->sendMutex);
return false;
}
struct DatagramListener* DatagramListener_construct(App* app, Node* localNode,
unsigned short udpPort)
{
struct DatagramListener* this = kmalloc(sizeof(*this), GFP_NOFS);
if(!this ||
!DatagramListener_init(this, app, localNode, udpPort) )
{
kfree(this);
return NULL;
}
return this;
}
void DatagramListener_uninit(DatagramListener* this)
{
Socket* udpSockBase = (Socket*)this->udpSock;
if(udpSockBase)
Socket_virtualDestruct(udpSockBase);
Mutex_uninit(&this->sendMutex);
SAFE_VFREE(this->sendBuf);
SAFE_VFREE(this->recvBuf);
Thread_uninit( (Thread*)this);
}
void DatagramListener_destruct(DatagramListener* this)
{
DatagramListener_uninit(this);
kfree(this);
}
static inline ssize_t DatagramListener_sendto_kernel(DatagramListener* this, void* buf, size_t len, int flags,
fhgfs_sockaddr_in* to)
{
ssize_t sendRes;
Mutex_lock(&this->sendMutex);
sendRes = Socket_sendto_kernel(&this->udpSock->pooledSocket.socket, buf, len, flags, to);
Mutex_unlock(&this->sendMutex);
return sendRes;
}
static inline ssize_t DatagramListener_sendtoIP_kernel(DatagramListener* this, void *buf, size_t len, int flags,
struct in_addr ipAddr, unsigned short port)
{
fhgfs_sockaddr_in peer = {
.addr = ipAddr,
.port = htons(port),
};
return DatagramListener_sendto_kernel(this, buf, len, flags, &peer);
}
#endif /*DATAGRAMLISTENER_H_*/

View File

@@ -0,0 +1,131 @@
#include <app/App.h>
#include <filesystem/FhgfsOpsHelper.h>
#include <common/toolkit/Time.h>
//#include <filesystem/FhgfsInode.h>
#include "Flusher.h"
void Flusher_init(Flusher* this, App* app)
{
// call super constructor
Thread_init( (Thread*)this, BEEGFS_THREAD_NAME_PREFIX_STR "Flusher", __Flusher_run);
this->app = app;
}
struct Flusher* Flusher_construct(App* app)
{
struct Flusher* this = (Flusher*)os_kmalloc(sizeof(*this) );
Flusher_init(this, app);
return this;
}
void Flusher_uninit(Flusher* this)
{
Thread_uninit( (Thread*)this);
}
void Flusher_destruct(Flusher* this)
{
Flusher_uninit(this);
kfree(this);
}
void _Flusher_requestLoop(Flusher* this)
{
int sleepTimeMS = 5*1000;
Thread* thisThread = (Thread*)this;
while(!_Thread_waitForSelfTerminateOrder(thisThread, sleepTimeMS) )
{
__Flusher_flushBuffers(this);
}
}
void __Flusher_run(Thread* this)
{
Flusher* thisCast = (Flusher*)this;
const char* logContext = "Flusher (run)";
Logger* log = App_getLogger(thisCast->app);
_Flusher_requestLoop(thisCast);
Logger_log(log, 4, logContext, "Component stopped.");
}
void __Flusher_flushBuffers(Flusher* this)
{
const char* logContext = "flushBuffers (async)";
InodeRefStore* refStore = App_getInodeRefStore(this->app);
Thread* thisThread = (Thread*)this;
struct inode* inode = InodeRefStore_getAndRemoveFirstInode(refStore);
while(inode)
{
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
FhgfsOpsErr flushRes = FhgfsOpsHelper_flushCacheNoWait(this->app, fhgfsInode, false);
if(flushRes == FhgfsOpsErr_SUCCESS)
{ // flush succeeded => drop inode reference
iput(inode);
}
else
if(flushRes == FhgfsOpsErr_INUSE)
{ // file wasn't flushed, because the lock is busy right now => re-add and continue with next
InodeRefStore_addOrPutInode(refStore, inode);
}
else
if( (flushRes != FhgfsOpsErr_COMMUNICATION) || !FhgfsInode_getIsFileOpen(fhgfsInode) )
{ /* unrecoverable error and file is no longer open (so there is no chance that the user app
can see an error code) so we have to discard the buffer to avoid retrying infintely
on this inode */
Logger* log = App_getLogger(this->app);
FhgfsOpsErr finalFlushRes = FhgfsOpsHelper_flushCache(
this->app, fhgfsInode, true);
if(finalFlushRes != FhgfsOpsErr_SUCCESS)
{ // final flush attempt failed => notify user
Logger_logFormatted(log, Log_DEBUG, logContext,
"Discarded file buffer due to unrecoverable error on closed file: %s",
FhgfsOpsErr_toErrString(finalFlushRes) );
}
iput(inode);
}
else
{ // comm error (or unrecoverable error, but file still open); flush failed => re-add inode
/* note: decreasing ref count if inode exists in store is important in addOrPutInode(),
* because we might race with a user app, e.g.:
* 1) flusher gets a comm error and flusher thread sleeps before calling addOrPutInode()
* 2) user app runs, flushes successfully, creates new cache buf and adds it to store
* 3) flusher wakes up and calls addOrPutInode() */
InodeRefStore_addOrPutInode(refStore, inode);
}
// check if user wants to unmount
if(Thread_getSelfTerminate(thisThread) )
break;
// proceed to next inode
/* note: it doesn't matter that the inode may no longer be valid here (after we dropped the
reference), because we're not accessing the inode in the InodeRefStore methods. */
inode = InodeRefStore_getAndRemoveNextInode(refStore, inode);
}
}

View File

@@ -0,0 +1,39 @@
#ifndef FLUSHER_H_
#define FLUSHER_H_
#include <app/log/Logger.h>
#include <toolkit/InodeRefStore.h>
/*
* This component performs async cache flushes in buffered file cache mode.
*
* Note: Flushing needs to happen in a dedicated thread to avoid blocking other threads during
* retries while a server is unreachable.
*/
struct Flusher;
typedef struct Flusher Flusher;
extern void Flusher_init(Flusher* this, App* app);
extern Flusher* Flusher_construct(App* app);
extern void Flusher_uninit(Flusher* this);
extern void Flusher_destruct(Flusher* this);
extern void _Flusher_requestLoop(Flusher* this);
extern void __Flusher_run(Thread* this);
void __Flusher_flushBuffers(Flusher* this);
struct Flusher
{
Thread thread; // base class
App* app;
};
#endif /* FLUSHER_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,209 @@
#ifndef INTERNODESYNCER_H_
#define INTERNODESYNCER_H_
#include <app/config/Config.h>
#include <app/log/Logger.h>
#include <common/nodes/MirrorBuddyGroupMapper.h>
#include <common/nodes/TargetMapper.h>
#include <common/storage/FileEvent.h>
#include <common/toolkit/list/PointerListIter.h>
#include <common/toolkit/MetadataTk.h>
#include <common/threading/AtomicInt.h>
#include <common/threading/Thread.h>
#include <common/threading/Mutex.h>
#include <common/threading/Condition.h>
#include <common/net/message/NetMessage.h>
#include <common/nodes/TargetStateStore.h>
#include <components/DatagramListener.h>
#include <net/filesystem/RemotingIOInfo.h>
#include <nodes/NodeStoreEx.h>
/*
* note: Delayed...Entry-queues management is integrated into InternodeSyncer, because it needs
* direct access to the queues via iterators and relies on special access patterns, e.g.
* it must be the only one removing entries from the queue (to keep iterators valid).
*/
// forward declarations...
struct InternodeSyncer;
typedef struct InternodeSyncer InternodeSyncer;
struct DelayedCloseEntry;
typedef struct DelayedCloseEntry DelayedCloseEntry;
struct DelayedEntryUnlockEntry;
typedef struct DelayedEntryUnlockEntry DelayedEntryUnlockEntry;
struct DelayedRangeUnlockEntry;
typedef struct DelayedRangeUnlockEntry DelayedRangeUnlockEntry;
extern void InternodeSyncer_init(InternodeSyncer* this, App* app);
extern InternodeSyncer* InternodeSyncer_construct(App* app);
extern void InternodeSyncer_uninit(InternodeSyncer* this);
extern void InternodeSyncer_destruct(InternodeSyncer* this);
extern bool InternodeSyncer_waitForMgmtInit(InternodeSyncer* this, int timeoutMS);
extern void InternodeSyncer_delayedCloseAdd(InternodeSyncer* this, const EntryInfo* entryInfo,
const RemotingIOInfo* ioInfo, struct FileEvent* event);
extern void InternodeSyncer_delayedEntryUnlockAdd(InternodeSyncer* this,
const EntryInfo* entryInfo, const RemotingIOInfo* ioInfo, int64_t clientFD);
extern void InternodeSyncer_delayedRangeUnlockAdd(InternodeSyncer* this,
const EntryInfo* entryInfo, const RemotingIOInfo* ioInfo, int ownerPID);
extern void _InternodeSyncer_requestLoop(InternodeSyncer* this);
extern void __InternodeSyncer_run(Thread* this);
extern void __InternodeSyncer_signalMgmtInitDone(InternodeSyncer* this);
extern void __InternodeSyncer_mgmtInit(InternodeSyncer* this);
extern bool __InternodeSyncer_waitForMgmtHeartbeat(InternodeSyncer* this);
extern bool __InternodeSyncer_registerNode(InternodeSyncer* this);
extern void __InternodeSyncer_reregisterNode(InternodeSyncer* this);
extern bool __InternodeSyncer_unregisterNode(InternodeSyncer* this);
extern void __InternodeSyncer_downloadAndSyncNodes(InternodeSyncer* this);
extern void __InternodeSyncer_printSyncResults(InternodeSyncer* this, NodeType nodeType,
NumNodeIDList* addedNodes, NumNodeIDList* removedNodes);
extern void __InternodeSyncer_downloadAndSyncTargetMappings(InternodeSyncer* this);
extern void __InternodeSyncer_delayedCloseComm(InternodeSyncer* this);
extern void __InternodeSyncer_delayedEntryUnlockComm(InternodeSyncer* this);
extern void __InternodeSyncer_delayedRangeUnlockComm(InternodeSyncer* this);
extern void __InternodeSyncer_delayedClosePrepareRemoting(InternodeSyncer* this,
DelayedCloseEntry* closeEntry, EntryInfo** outEntryInfo, RemotingIOInfo* outIOInfo);
extern void __InternodeSyncer_delayedEntryUnlockPrepareRemoting(InternodeSyncer* this,
DelayedEntryUnlockEntry* closeEntry, EntryInfo** outEntryInfo, RemotingIOInfo* outIOInfo);
extern void __InternodeSyncer_delayedRangeUnlockPrepareRemoting(InternodeSyncer* this,
DelayedRangeUnlockEntry* closeEntry, EntryInfo** outEntryInfo, RemotingIOInfo* outIOInfo);
extern void __InternodeSyncer_delayedCloseFreeEntry(InternodeSyncer* this,
DelayedCloseEntry* closeEntry);
extern void __InternodeSyncer_delayedEntryUnlockFreeEntry(InternodeSyncer* this,
DelayedEntryUnlockEntry* closeEntry);
extern void __InternodeSyncer_delayedRangeUnlockFreeEntry(InternodeSyncer* this,
DelayedRangeUnlockEntry* closeEntry);
extern void __InternodeSyncer_dropIdleConns(InternodeSyncer* this);
extern unsigned __InternodeSyncer_dropIdleConnsByStore(InternodeSyncer* this, NodeStoreEx* nodes);
extern void __InternodeSyncer_updateTargetStatesAndBuddyGroups(InternodeSyncer* this,
NodeType nodeType);
extern bool __InternodeSyncer_checkNetwork(InternodeSyncer* this);
// getters & setters
static inline void InternodeSyncer_setForceTargetStatesUpdate(InternodeSyncer* this);
static inline bool InternodeSyncer_getAndResetForceTargetStatesUpdate(InternodeSyncer* this);
extern size_t InternodeSyncer_getDelayedCloseQueueSize(InternodeSyncer* this);
extern size_t InternodeSyncer_getDelayedEntryUnlockQueueSize(InternodeSyncer* this);
extern size_t InternodeSyncer_getDelayedRangeUnlockQueueSize(InternodeSyncer* this);
struct DelayedCloseEntry
{
Time ageT; // time when this entry was created (to compute entry age)
EntryInfo entryInfo;
// fields for RemotingIOInfo
const char* fileHandleID;
unsigned accessFlags; // OPENFILE_ACCESS_... flags
bool needsAppendLockCleanup;
AtomicInt maxUsedTargetIndex; // (used as a reference in ioInfo)
bool hasEvent;
struct FileEvent event;
};
struct DelayedEntryUnlockEntry
{
Time ageT; // time when this entry was created (to compute entry age)
EntryInfo entryInfo;
// fields for RemotingIOInfo
const char* fileHandleID;
int64_t clientFD;
};
struct DelayedRangeUnlockEntry
{
Time ageT; // time when this entry was created (to compute entry age)
EntryInfo entryInfo;
// fields for RemotingIOInfo
const char* fileHandleID;
int ownerPID;
};
struct InternodeSyncer
{
Thread thread; // base class
App* app;
Config* cfg;
DatagramListener* dgramLis;
NodeStoreEx* mgmtNodes;
NodeStoreEx* metaNodes;
NodeStoreEx* storageNodes;
bool mgmtInitDone;
Mutex mgmtInitDoneMutex;
Condition mgmtInitDoneCond; // signaled when init is done (doesn't mean it was successful)
bool nodeRegistered; // true if the mgmt host ack'ed our heartbeat
bool forceTargetStatesUpdate; // force an update of target states
Time lastSuccessfulTargetStatesUpdateT;
unsigned targetOfflineTimeoutMS;
Mutex delayedCloseMutex; // for delayedCloseQueue
PointerList delayedCloseQueue; /* remove from head, add to tail (important, because we rely on
validity of an iterator even when a new entry was added) */
Mutex delayedEntryUnlockMutex; // for delayedEntryUnlockQueue
PointerList delayedEntryUnlockQueue; /* remove from head, add to tail (important, because we
rely on validity of an iterator even when a new entry was added) */
Mutex delayedRangeUnlockMutex; // for delayedRangeUnlockQueue
PointerList delayedRangeUnlockQueue; /* remove from head, add to tail (important, because we
rely on validity of an iterator even when a new entry was added) */
Mutex forceTargetStatesUpdateMutex; // for forceTargetStates
};
void InternodeSyncer_setForceTargetStatesUpdate(InternodeSyncer* this)
{
Mutex_lock(&this->forceTargetStatesUpdateMutex); // L O C K
this->forceTargetStatesUpdate = true;
Mutex_unlock(&this->forceTargetStatesUpdateMutex); // U N L O C K
}
bool InternodeSyncer_getAndResetForceTargetStatesUpdate(InternodeSyncer* this)
{
bool retVal;
Mutex_lock(&this->forceTargetStatesUpdateMutex); // L O C K
retVal = this->forceTargetStatesUpdate;
this->forceTargetStatesUpdate = false;
Mutex_unlock(&this->forceTargetStatesUpdateMutex); // U N L O C K
return retVal;
}
#endif /*INTERNODESYNCER_H_*/

View File

@@ -0,0 +1,188 @@
#include <app/log/Logger.h>
#include <app/App.h>
#include <filesystem/FhgfsOpsPages.h>
#include "RWPagesWork.h"
#define RWPagesWorkQueue_SUB_NAME BEEGFS_MODULE_NAME_STR "-rwPgWQ" // read-pages-work-queue
static struct workqueue_struct* rwPagesWorkQueue = NULL;
static void RWPagesWork_processQueue(RWPagesWork* this);
static bool RWPagesWork_queue(RWPagesWork *this);
static FhgfsOpsErr _RWPagesWork_initReferenceFile(struct inode* inode, Fhgfs_RWType rwType,
FileHandleType* outHandleType, RemotingIOInfo* outIOInfo);
bool RWPagesWork_initworkQueue(void)
{
rwPagesWorkQueue = create_workqueue(RWPagesWorkQueue_SUB_NAME);
return !!rwPagesWorkQueue;
}
void RWPagesWork_destroyWorkQueue(void)
{
if (rwPagesWorkQueue)
{
flush_workqueue(rwPagesWorkQueue);
destroy_workqueue(rwPagesWorkQueue);
}
}
void RWPagesWork_flushWorkQueue(void)
{
if (rwPagesWorkQueue)
flush_workqueue(rwPagesWorkQueue);
}
bool RWPagesWork_queue(RWPagesWork *this)
{
return queue_work(rwPagesWorkQueue, &this->kernelWork);
}
bool RWPagesWork_init(RWPagesWork* this, App* app, struct inode* inode,
FhgfsChunkPageVec *pageVec, Fhgfs_RWType rwType)
{
FhgfsOpsErr referenceRes;
this->app = app;
this->inode = inode;
this->pageVec = pageVec;
this->rwType = rwType;
referenceRes = _RWPagesWork_initReferenceFile(inode, rwType, &this->handleType, &this->ioInfo);
if (unlikely(referenceRes != FhgfsOpsErr_SUCCESS) )
return false;
INIT_WORK(&this->kernelWork, RWPagesWork_process);
return true;
}
/**
* Init helper function to reference a file.
*
* Note: The file is already supposed to be referenced by the FhgfsOpsPages_readpages or
* FhgfsOpsPages_writepages, so file referencing is not supposed to fail
*/
FhgfsOpsErr _RWPagesWork_initReferenceFile(struct inode* inode, Fhgfs_RWType rwType,
FileHandleType* outHandleType, RemotingIOInfo* outIOInfo)
{
FhgfsOpsErr referenceRes;
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
int openFlags = (rwType == BEEGFS_RWTYPE_WRITE) ? OPENFILE_ACCESS_WRITE : OPENFILE_ACCESS_READ;
referenceRes = FhgfsInode_referenceHandle(fhgfsInode, NULL, openFlags, true, NULL,
outHandleType, NULL);
if (unlikely(referenceRes != FhgfsOpsErr_SUCCESS) )
{ // failure
printk_fhgfs(KERN_INFO, "Bug: file not referenced");
dump_stack();
}
else
{ // success
//get the right openFlags (might have changed to OPENFILE_ACCESS_READWRITE)
openFlags = FhgfsInode_handleTypeToOpenFlags(*outHandleType);
FhgfsInode_getRefIOInfo(fhgfsInode, *outHandleType, openFlags, outIOInfo);
}
return referenceRes;
}
/**
* Process the work queue
*/
void RWPagesWork_process(struct work_struct* work)
{
RWPagesWork* thisCast = (RWPagesWork*)work;
RWPagesWork_processQueue(thisCast);
}
/**
* Needed for old INIT_WORK() with 3 parameters (before 2.6.20)
*/
void RWPagesWork_oldProcess(void* data)
{
struct work_struct* work = (struct work_struct*) data;
return RWPagesWork_process(work);
}
/**
* Build worker queues
*/
bool RWPagesWork_createQueue(App* app, FhgfsChunkPageVec* pageVec, struct inode* inode,
Fhgfs_RWType rwType)
{
Logger* log = App_getLogger(app);
const char* logContext = __func__;
bool retVal = true;
RWPagesWork* work;
work = RWPagesWork_construct(app, inode, pageVec, rwType);
if (likely(work) )
{
bool queueRes;
queueRes = RWPagesWork_queue(work);
if (!queueRes)
{
Logger_logErr(log, logContext, "RWPagesWork_construct failed.");
if (rwType == BEEGFS_RWTYPE_READ)
FhgfsChunkPageVec_iterateAllHandleReadErr(pageVec);
else
FhgfsChunkPageVec_iterateAllHandleWritePages(pageVec, -EIO);
RWPagesWork_destruct(work);
}
}
if (unlikely(!work))
{ // Creating the work-queue failed
Logger_logErr(log, logContext, "Failed to create work queue.");
retVal = false;
}
return retVal;
}
/**
* Process a request from the queue
*/
void RWPagesWork_processQueue(RWPagesWork* this)
{
App* app = this->app;
Logger* log = App_getLogger(app);
ssize_t rwRes;
rwRes = FhgfsOpsRemoting_rwChunkPageVec(this->pageVec, &this->ioInfo, this->rwType);
if (unlikely(rwRes < 0) )
LOG_DEBUG_FORMATTED(log, 1, __func__, "error: %s", FhgfsOpsErr_toErrString(-rwRes) );
else
{
LOG_DEBUG_FORMATTED(log, 5, __func__, "rwRes: %zu", rwRes );
IGNORE_UNUSED_VARIABLE(log);
}
RWPagesWork_destruct(this);
}

View File

@@ -0,0 +1,104 @@
#ifndef RWPAGESWORK_H_
#define RWPAGESWORK_H_
#include <common/Common.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <toolkit/FhgfsPage.h>
#include <toolkit/FhgfsChunkPageVec.h>
#include <net/filesystem/RemotingIOInfo.h>
#include <filesystem/FhgfsInode.h>
#include <filesystem/FhgfsOpsFile.h>
#include <filesystem/FsFileInfo.h>
#include <common/threading/AtomicInt.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include <linux/fs.h>
struct RWPagesWork;
typedef struct RWPagesWork RWPagesWork;
extern bool RWPagesWork_createQueue(App* app, FhgfsChunkPageVec* pageVec, struct inode* inode,
Fhgfs_RWType rwType);
bool RWPagesWork_init(RWPagesWork* this, App* app, struct inode* inode,
FhgfsChunkPageVec *pageVec, Fhgfs_RWType rwType);
static inline RWPagesWork* RWPagesWork_construct(App* app, struct inode* inode,
FhgfsChunkPageVec *pageVec, Fhgfs_RWType rwType);
static inline void RWPagesWork_uninit(RWPagesWork* this);
static inline void RWPagesWork_destruct(RWPagesWork* this);
extern bool RWPagesWork_initworkQueue(void);
extern void RWPagesWork_destroyWorkQueue(void);
extern void RWPagesWork_flushWorkQueue(void);
// virtual functions
extern void RWPagesWork_process(struct work_struct* work);
extern void RWPagesWork_oldProcess(void* data);
struct RWPagesWork
{
struct work_struct kernelWork;
App* app;
FhgfsChunkPageVec* pageVec;
RemotingIOInfo ioInfo;
FileHandleType handleType;
Fhgfs_RWType rwType;
struct inode* inode;
};
/**
* RWPagesWork Constructor
*/
struct RWPagesWork* RWPagesWork_construct(App* app, struct inode* inode,
FhgfsChunkPageVec * pageVec, Fhgfs_RWType rwType)
{
bool initRes;
struct RWPagesWork* this = (RWPagesWork*)os_kmalloc(sizeof(*this) );
if (unlikely(!this) )
return NULL;
initRes = RWPagesWork_init(this, app, inode, pageVec, rwType);
if (unlikely(!initRes) )
{ // uninitilize everything that already has been initialized and free this
if (rwType == BEEGFS_RWTYPE_READ)
FhgfsChunkPageVec_iterateAllHandleReadErr(pageVec);
else
FhgfsChunkPageVec_iterateAllHandleWritePages(pageVec, -EIO);
RWPagesWork_destruct(this);
this = NULL;
}
return this;
}
/**
* Unitinialize worker data
*/
void RWPagesWork_uninit(RWPagesWork* this)
{
FhgfsInode* fhgfsInode = BEEGFS_INODE(this->inode);
FhgfsChunkPageVec_destroy(this->pageVec);
FhgfsInode_releaseHandle(fhgfsInode, this->handleType, NULL);
}
void RWPagesWork_destruct(RWPagesWork* this)
{
RWPagesWork_uninit(this);
kfree(this);
}
#endif /* RWPAGESWORK_H_ */