New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

369
meta/CMakeLists.txt Normal file
View File

@@ -0,0 +1,369 @@
include_directories(
source
)
add_library(
meta STATIC
./source/toolkit/StorageTkEx.cpp
./source/toolkit/BuddyCommTk.cpp
./source/toolkit/BuddyCommTk.h
./source/toolkit/XAttrTk.h
./source/toolkit/XAttrTk.cpp
./source/toolkit/StorageTkEx.h
./source/net/message/mon/RequestMetaDataMsgEx.h
./source/net/message/mon/RequestMetaDataMsgEx.cpp
./source/net/message/control/AckMsgEx.h
./source/net/message/control/SetChannelDirectMsgEx.cpp
./source/net/message/control/AckMsgEx.cpp
./source/net/message/control/SetChannelDirectMsgEx.h
./source/net/message/NetMessageFactory.h
./source/net/message/session/opening/OpenFileMsgEx.cpp
./source/net/message/session/opening/CloseFileMsgEx.h
./source/net/message/session/opening/OpenFileMsgEx.h
./source/net/message/session/opening/CloseFileMsgEx.cpp
./source/net/message/session/BumpFileVersionMsgEx.h
./source/net/message/session/GetFileVersionMsgEx.h
./source/net/message/session/locking/FLockEntryMsgEx.h
./source/net/message/session/locking/FLockRangeMsgEx.h
./source/net/message/session/locking/FLockRangeMsgEx.cpp
./source/net/message/session/locking/FLockAppendMsgEx.h
./source/net/message/session/locking/FLockEntryMsgEx.cpp
./source/net/message/session/locking/FLockAppendMsgEx.cpp
./source/net/message/session/GetFileVersionMsgEx.cpp
./source/net/message/session/BumpFileVersionMsgEx.cpp
./source/net/message/session/AckNotifyMsgEx.h
./source/net/message/NetMessageFactory.cpp
./source/net/message/nodes/SetTargetConsistencyStatesMsgEx.cpp
./source/net/message/nodes/GetNodesMsgEx.h
./source/net/message/nodes/GetNodeCapacityPoolsMsgEx.cpp
./source/net/message/nodes/RefreshCapacityPoolsMsgEx.cpp
./source/net/message/nodes/GetTargetMappingsMsgEx.h
./source/net/message/nodes/GetClientStatsMsgEx.h
./source/net/message/nodes/MapTargetsMsgEx.cpp
./source/net/message/nodes/RefreshTargetStatesMsgEx.cpp
./source/net/message/nodes/storagepools/RefreshStoragePoolsMsgEx.h
./source/net/message/nodes/storagepools/RefreshStoragePoolsMsgEx.cpp
./source/net/message/nodes/HeartbeatMsgEx.cpp
./source/net/message/nodes/HeartbeatRequestMsgEx.h
./source/net/message/nodes/GetClientStatsMsgEx.cpp
./source/net/message/nodes/GetTargetMappingsMsgEx.cpp
./source/net/message/nodes/PublishCapacitiesMsgEx.h
./source/net/message/nodes/GenericDebugMsgEx.cpp
./source/net/message/nodes/GetNodesMsgEx.cpp
./source/net/message/nodes/SetMirrorBuddyGroupMsgEx.h
./source/net/message/nodes/PublishCapacitiesMsgEx.cpp
./source/net/message/nodes/RemoveNodeMsgEx.cpp
./source/net/message/nodes/SetTargetConsistencyStatesMsgEx.h
./source/net/message/nodes/HeartbeatRequestMsgEx.cpp
./source/net/message/nodes/MapTargetsMsgEx.h
./source/net/message/nodes/HeartbeatMsgEx.h
./source/net/message/nodes/GenericDebugMsgEx.h
./source/net/message/nodes/RemoveNodeMsgEx.h
./source/net/message/nodes/GetNodeCapacityPoolsMsgEx.h
./source/net/message/nodes/RefreshTargetStatesMsgEx.h
./source/net/message/nodes/SetMirrorBuddyGroupMsgEx.cpp
./source/net/message/nodes/RefreshCapacityPoolsMsgEx.h
./source/net/message/MirroredMessage.h
./source/net/message/storage/moving/RenameV2MsgEx.cpp
./source/net/message/storage/moving/RenameV2MsgEx.h
./source/net/message/storage/moving/MovingFileInsertMsgEx.cpp
./source/net/message/storage/moving/MovingDirInsertMsgEx.h
./source/net/message/storage/moving/MovingFileInsertMsgEx.h
./source/net/message/storage/moving/MovingDirInsertMsgEx.cpp
./source/net/message/storage/GetHighResStatsMsgEx.h
./source/net/message/storage/creating/MkFileWithPatternMsgEx.cpp
./source/net/message/storage/creating/MkLocalDirMsgEx.cpp
./source/net/message/storage/creating/MkFileWithPatternMsgEx.h
./source/net/message/storage/creating/RmDirEntryMsgEx.h
./source/net/message/storage/creating/UnlinkFileMsgEx.h
./source/net/message/storage/creating/MkFileMsgEx.cpp
./source/net/message/storage/creating/RmDirMsgEx.cpp
./source/net/message/storage/creating/MkLocalDirMsgEx.h
./source/net/message/storage/creating/MkFileMsgEx.h
./source/net/message/storage/creating/MkDirMsgEx.cpp
./source/net/message/storage/creating/UnlinkFileMsgEx.cpp
./source/net/message/storage/creating/MkDirMsgEx.h
./source/net/message/storage/creating/HardlinkMsgEx.h
./source/net/message/storage/creating/HardlinkMsgEx.cpp
./source/net/message/storage/creating/RmLocalDirMsgEx.cpp
./source/net/message/storage/creating/RmDirEntryMsgEx.cpp
./source/net/message/storage/creating/RmLocalDirMsgEx.h
./source/net/message/storage/creating/RmDirMsgEx.h
./source/net/message/storage/creating/MoveFileInodeMsgEx.cpp
./source/net/message/storage/creating/UnlinkLocalFileInodeMsgEx.cpp
./source/net/message/storage/TruncFileMsgEx.h
./source/net/message/storage/mirroring/ResyncRawInodesMsgEx.cpp
./source/net/message/storage/mirroring/StorageResyncStartedMsgEx.cpp
./source/net/message/storage/mirroring/StorageResyncStartedMsgEx.h
./source/net/message/storage/mirroring/SetMetadataMirroringMsgEx.h
./source/net/message/storage/mirroring/ResyncSessionStoreMsgEx.h
./source/net/message/storage/mirroring/ResyncSessionStoreMsgEx.cpp
./source/net/message/storage/mirroring/GetMetaResyncStatsMsgEx.h
./source/net/message/storage/mirroring/SetMetadataMirroringMsgEx.cpp
./source/net/message/storage/mirroring/GetMetaResyncStatsMsgEx.cpp
./source/net/message/storage/mirroring/ResyncRawInodesMsgEx.h
./source/net/message/storage/attribs/RemoveXAttrMsgEx.cpp
./source/net/message/storage/attribs/UpdateDirParentMsgEx.h
./source/net/message/storage/attribs/RefreshEntryInfoMsgEx.h
./source/net/message/storage/attribs/StatMsgEx.h
./source/net/message/storage/attribs/RemoveXAttrMsgEx.h
./source/net/message/storage/attribs/ListXAttrMsgEx.h
./source/net/message/storage/attribs/GetEntryInfoMsgEx.h
./source/net/message/storage/attribs/SetAttrMsgEx.h
./source/net/message/storage/attribs/SetXAttrMsgEx.h
./source/net/message/storage/attribs/SetDirPatternMsgEx.h
./source/net/message/storage/attribs/ListXAttrMsgEx.cpp
./source/net/message/storage/attribs/SetDirPatternMsgEx.cpp
./source/net/message/storage/attribs/GetXAttrMsgEx.cpp
./source/net/message/storage/attribs/StatMsgEx.cpp
./source/net/message/storage/attribs/RefreshEntryInfoMsg.cpp
./source/net/message/storage/attribs/GetEntryInfoMsgEx.cpp
./source/net/message/storage/attribs/GetXAttrMsgEx.h
./source/net/message/storage/attribs/UpdateDirParentMsgEx.cpp
./source/net/message/storage/attribs/SetAttrMsgEx.cpp
./source/net/message/storage/attribs/SetXAttrMsgEx.cpp
./source/net/message/storage/attribs/SetFilePatternMsgEx.cpp
./source/net/message/storage/TruncFileMsgEx.cpp
./source/net/message/storage/GetHighResStatsMsgEx.cpp
./source/net/message/storage/lookup/FindOwnerMsgEx.cpp
./source/net/message/storage/lookup/FindLinkOwnerMsgEx.cpp
./source/net/message/storage/lookup/FindOwnerMsgEx.h
./source/net/message/storage/lookup/LookupIntentMsgEx.h
./source/net/message/storage/lookup/FindLinkOwnerMsgEx.h
./source/net/message/storage/lookup/LookupIntentMsgEx.cpp
./source/net/message/storage/chunkbalancing/ChunkBalanceMsgEx.cpp
./source/net/message/storage/chunkbalancing/StripePatternUpdateMsgEx.cpp
./source/net/message/storage/StatStoragePathMsgEx.h
./source/net/message/storage/StatStoragePathMsgEx.cpp
./source/net/message/storage/listing/ListDirFromOffsetMsgEx.h
./source/net/message/storage/listing/ListDirFromOffsetMsgEx.cpp
./source/net/message/storage/quota/SetExceededQuotaMsgEx.cpp
./source/net/message/storage/quota/SetExceededQuotaMsgEx.h
./source/net/message/storage/attribs/SetFilePatternMsgEx.h
./source/net/message/storage/attribs/SetFilePatternMsgEx.cpp
./source/net/message/storage/creating/UnlinkLocalFileInodeMsgEx.h
./source/net/message/storage/creating/UnlinkLocalFileInodeMsgEx.cpp
./source/net/message/storage/creating/MoveFileInodeMsgEx.h
./source/net/message/storage/creating/MoveFileInodeMsgEx.cpp
./source/net/message/fsck/UpdateFileAttribsMsgEx.cpp
./source/net/message/fsck/RemoveInodesMsgEx.h
./source/net/message/fsck/AdjustChunkPermissionsMsgEx.h
./source/net/message/fsck/FixInodeOwnersMsgEx.cpp
./source/net/message/fsck/AdjustChunkPermissionsMsgEx.cpp
./source/net/message/fsck/FsckSetEventLoggingMsgEx.h
./source/net/message/fsck/UpdateDirAttribsMsgEx.cpp
./source/net/message/fsck/FixInodeOwnersMsgEx.h
./source/net/message/fsck/UpdateFileAttribsMsgEx.h
./source/net/message/fsck/UpdateDirAttribsMsgEx.h
./source/net/message/fsck/FixInodeOwnersInDentryMsgEx.cpp
./source/net/message/fsck/RetrieveDirEntriesMsgEx.cpp
./source/net/message/fsck/DeleteDirEntriesMsgEx.h
./source/net/message/fsck/RetrieveFsIDsMsgEx.h
./source/net/message/fsck/CreateEmptyContDirsMsgEx.h
./source/net/message/fsck/RetrieveInodesMsgEx.cpp
./source/net/message/fsck/LinkToLostAndFoundMsgEx.h
./source/net/message/fsck/RetrieveInodesMsgEx.h
./source/net/message/fsck/RecreateDentriesMsgEx.cpp
./source/net/message/fsck/CreateDefDirInodesMsgEx.h
./source/net/message/fsck/LinkToLostAndFoundMsgEx.cpp
./source/net/message/fsck/RecreateFsIDsMsgEx.cpp
./source/net/message/fsck/FixInodeOwnersInDentryMsgEx.h
./source/net/message/fsck/FsckSetEventLoggingMsgEx.cpp
./source/net/message/fsck/RetrieveDirEntriesMsgEx.h
./source/net/message/fsck/DeleteDirEntriesMsgEx.cpp
./source/net/message/fsck/CreateDefDirInodesMsgEx.cpp
./source/net/message/fsck/RecreateDentriesMsgEx.h
./source/net/message/fsck/RecreateFsIDsMsgEx.h
./source/net/message/fsck/RemoveInodesMsgEx.cpp
./source/net/message/fsck/RetrieveFsIDsMsgEx.cpp
./source/net/message/fsck/CreateEmptyContDirsMsgEx.cpp
./source/net/message/fsck/CheckAndRepairDupInodeMsgEx.h
./source/net/message/fsck/CheckAndRepairDupInodeMsgEx.cpp
./source/net/msghelpers/MsgHelperMkFile.h
./source/net/msghelpers/MsgHelperTrunc.cpp
./source/net/msghelpers/MsgHelperXAttr.cpp
./source/net/msghelpers/MsgHelperStat.h
./source/net/msghelpers/MsgHelperLocking.h
./source/net/msghelpers/MsgHelperUnlink.h
./source/net/msghelpers/MsgHelperXAttr.h
./source/net/msghelpers/MsgHelperOpen.cpp
./source/net/msghelpers/MsgHelperClose.cpp
./source/net/msghelpers/MsgHelperClose.h
./source/net/msghelpers/MsgHelperTrunc.h
./source/net/msghelpers/MsgHelperUnlink.cpp
./source/net/msghelpers/MsgHelperMkFile.cpp
./source/net/msghelpers/MsgHelperStat.cpp
./source/net/msghelpers/MsgHelperOpen.h
./source/net/msghelpers/MsgHelperLocking.cpp
./source/components/FileEventLogger.h
./source/components/DisposalGarbageCollector.h
./source/components/DatagramListener.h
./source/components/InternodeSyncer.h
./source/components/ModificationEventFlusher.cpp
./source/components/ModificationEventFlusher.h
./source/components/InternodeSyncer.cpp
./source/components/DatagramListener.cpp
./source/components/worker/GetChunkFileAttribsWork.cpp
./source/components/worker/SetChunkFileAttribsWork.h
./source/components/worker/SetChunkFileAttribsWork.cpp
./source/components/worker/UnlinkChunkFileWork.h
./source/components/worker/BarrierWork.h
./source/components/worker/CloseChunkFileWork.h
./source/components/worker/GetChunkFileAttribsWork.h
./source/components/worker/UnlinkChunkFileWork.cpp
./source/components/worker/TruncChunkFileWork.cpp
./source/components/worker/CloseChunkFileWork.cpp
./source/components/worker/LockEntryNotificationWork.h
./source/components/worker/LockEntryNotificationWork.cpp
./source/components/worker/LockRangeNotificationWork.h
./source/components/worker/LockRangeNotificationWork.cpp
./source/components/worker/TruncChunkFileWork.h
./source/components/FileEventLogger.cpp
./source/components/DisposalGarbageCollector.cpp
./source/components/buddyresyncer/BuddyResyncer.cpp
./source/components/buddyresyncer/BuddyResyncJob.h
./source/components/buddyresyncer/BuddyResyncerBulkSyncSlave.cpp
./source/components/buddyresyncer/SessionStoreResyncer.cpp
./source/components/buddyresyncer/BuddyResyncerGatherSlave.cpp
./source/components/buddyresyncer/BuddyResyncerBulkSyncSlave.h
./source/components/buddyresyncer/SyncCandidate.h
./source/components/buddyresyncer/BuddyResyncerGatherSlave.h
./source/components/buddyresyncer/SessionStoreResyncer.h
./source/components/buddyresyncer/BuddyResyncJob.cpp
./source/components/buddyresyncer/SyncSlaveBase.cpp
./source/components/buddyresyncer/SyncSlaveBase.h
./source/components/buddyresyncer/BuddyResyncerModSyncSlave.h
./source/components/buddyresyncer/BuddyResyncerModSyncSlave.cpp
./source/components/buddyresyncer/BuddyResyncer.h
./source/session/LockingNotifier.cpp
./source/session/EntryLock.h
./source/session/EntryLockStore.cpp
./source/session/EntryLockStore.h
./source/session/SessionFile.h
./source/session/LockingNotifier.h
./source/session/Session.h
./source/session/SessionStore.cpp
./source/session/SessionFileStore.h
./source/session/SessionFile.cpp
./source/session/Session.cpp
./source/session/MirrorMessageResponseState.h
./source/session/SessionStore.h
./source/session/SessionFileStore.cpp
./source/session/MirrorMessageResponseState.cpp
./source/program/Program.h
./source/program/Program.cpp
./source/program/Main.cpp
./source/app/App.h
./source/app/App.cpp
./source/app/config/Config.h
./source/app/config/Config.cpp
./source/nodes/MetaNodeOpStats.h
./source/storage/DirInode.h
./source/storage/IncompleteInode.cpp
./source/storage/MetadataEx.h
./source/storage/Locking.h
./source/storage/DirEntryStore.cpp
./source/storage/DentryStoreData.h
./source/storage/FileInodeStoreData.h
./source/storage/InodeFileStore.cpp
./source/storage/PosixACL.cpp
./source/storage/IncompleteInode.h
./source/storage/Locking.cpp
./source/storage/MkFileDetails.h
./source/storage/MetaStore.cpp
./source/storage/InodeFileStore.h
./source/storage/FileInode.cpp
./source/storage/DiskMetaData.cpp
./source/storage/FileInode.h
./source/storage/InodeDirStore.cpp
./source/storage/DirEntry.cpp
./source/storage/SyncedDiskAccessPath.h
./source/storage/DirEntryStore.h
./source/storage/MetaStore.h
./source/storage/MetaStoreRename.cpp
./source/storage/NodeOfflineWait.h
./source/storage/InodeDirStore.h
./source/storage/DirInode.cpp
./source/storage/DiskMetaData.h
./source/storage/DirEntry.h
./source/storage/MetaFileHandle.h
./source/storage/FileInodeStoreData.cpp
./source/storage/PosixACL.h
)
target_link_libraries(
meta
beegfs-common
dl
pthread
blkid
)
add_executable(
beegfs-meta
source/program/Main.cpp
)
target_link_libraries(
beegfs-meta
meta
)
if(NOT BEEGFS_SKIP_TESTS)
add_executable(
test-meta
./tests/TestConfig.h
./tests/TestSerialization.h
./tests/TestSerialization.cpp
./tests/TestConfig.cpp
./tests/TestBuddyMirroring.cpp
)
target_link_libraries(
test-meta
meta
gtest_main
)
# required for a test
file(
COPY ${CMAKE_CURRENT_SOURCE_DIR}/build/dist/etc/beegfs-meta.conf
DESTINATION dist/etc/
)
add_test(
NAME test-meta
COMMAND test-meta --compiler
)
endif()
install(
TARGETS beegfs-meta
DESTINATION "usr/sbin"
COMPONENT "meta"
)
install(
PROGRAMS "build/dist/sbin/beegfs-setup-meta"
DESTINATION "usr/sbin"
COMPONENT "meta"
)
install(
FILES "build/dist/usr/lib/systemd/system/beegfs-meta.service" "build/dist/usr/lib/systemd/system/beegfs-meta@.service"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/systemd/system"
COMPONENT "meta"
)
install(
FILES "build/dist/etc/beegfs-meta.conf"
DESTINATION "etc/beegfs"
COMPONENT "meta"
)
install(
PROGRAMS "build/beegfs-meta.sh"
RENAME "beegfs-meta"
DESTINATION "opt/beegfs/sbin"
COMPONENT "meta"
)

31
meta/build/Makefile Normal file
View File

@@ -0,0 +1,31 @@
include ../../build/Makefile
main := ../source/program/Main.cpp
sources := $(filter-out $(main), $(shell find ../source -iname '*.cpp'))
$(call build-static-library,\
Meta,\
$(sources),\
common dl blkid uuid nl3-route,\
../source)
$(call define-dep-lib,\
Meta,\
-I ../source,\
$(build_dir)/libMeta.a)
$(call build-executable,\
beegfs-meta,\
$(main),\
Meta common dl blkid uuid nl3-route)
$(call build-test,\
test-runner,\
$(shell find ../tests -name '*.cpp'),\
Meta common dl blkid uuid nl3-route,\
../tests)
# enable special reference DirInode debug code
ifneq ($(BEEGFS_DEBUG_RELEASE_DIR),) # extra release dir debugging
CXXFLAGS += -DBEEGFS_DEBUG_RELEASE_DIR
endif

614
meta/build/dist/etc/beegfs-meta.conf vendored Normal file
View File

@@ -0,0 +1,614 @@
# This is a config file for BeeGFS metadata nodes.
# http://www.beegfs.com
# --- [Table of Contents] ---
#
# 1) Settings
# 2) Command Line Arguments
# 3) Basic Settings Documentation
# 4) Advanced Settings Documentation
#
# --- Section 1.1: [Basic Settings] ---
#
sysMgmtdHost =
storeMetaDirectory =
storeAllowFirstRunInit = true
storeFsUUID =
#
# --- Section 1.2: [Advanced Settings] ---
#
connAuthFile = /etc/beegfs/conn.auth
connDisableAuthentication = false
connBacklogTCP = 128
connFallbackExpirationSecs = 900
connInterfacesFile =
connMaxInternodeNum = 32
connMetaPort = 8005
connMgmtdPort = 8008
connPortShift = 0
connNetFilterFile =
connUseRDMA = true
connRDMATypeOfService = 0
connTcpOnlyFilterFile =
logType = syslog
logLevel = 3
logNoDate = false
logNumLines = 50000
logNumRotatedFiles = 5
logStdFile = /var/log/beegfs-meta.log
runDaemonized = true
storeClientXAttrs = false
storeClientACLs = false
storeUseExtendedAttribs = true
sysTargetAttachmentFile =
sysTargetOfflineTimeoutSecs = 180
sysAllowUserSetPattern = false
tuneBindToNumaZone =
tuneNumStreamListeners = 1
tuneNumWorkers = 0
tuneTargetChooser = randomized
tuneUseAggressiveStreamPoll = false
tuneUsePerUserMsgQueues = false
#
# --- Section 2: [Command Line Arguments] ---
#
# Use the command line argument "cfgFile=/etc/anotherconfig.conf" to
# specify a different config file for beegfs-meta.
# All other options in this file can also be used as command line
# arguments, overriding the corresponding config file values.
#
# --- Section 3: [Basic Settings Documentation] ---
#
# [sysMgmtdHost]
# Hostname (or IP) of the host running the management service.
# (See also "connMgmtdPort")
# Default: <none>
# [storeMetaDirectory]
# The absolute path and name of a directory where the file system can store its
# metadata.
# Default: <none>
# [storeFsUUID]
# Requires the underlying file system of the metadata directory to have the same
# UUID as set here. This prevents the meta node from accidentaly starting from the
# wrong device, e.g. when it is not properly mounted. To find the UUID to
# put here, you can, for example, use blkid:
#
# blkid -s UUID
#
# This will output all devices on the host with their file systems UUID (if there
# is one). Choose the correct one and copy it here. This command needs to be run
# as root.
#
# If left empty, the check is skipped. It is highly recommended to enable this check
# after installation to prevent data corruption.
# Default: <none>
# [storeAllowFirstRunInit]
# Enables or disables daemon startup with an uninitialized storage directory.
# This can be used to make sure that the daemon does not run when the storage
# partition is not mounted (e.g. because it needs repair after a power outage).
# Note: This setting must be enabled during first startup of the daemon, but
# may be disabled afterwards.
# Default: true
#
# --- Section 4: [Advanced Settings Documentation] ---
#
#
# --- Section 4.1: [Connections & Communication] ---
#
# [connAuthFile]
# The path to a file that contains a shared secret for connection based
# authentication. Only peers that use the same shared secret will be able to
# connect.
# Default: <none>
# [connDisableAuthentication]
# If set to true, explicitly disables connection authentication and allow the
# service to run without a connAuthFile. Running BeeGFS without connection
# authentication is considered insecure and is not recommended.
# Default: false
# [connBacklogTCP]
# The TCP listen backlog.
# Default: 128
# [connFallbackExpirationSecs]
# The time in seconds after which a connection to a fallback interface expires.
# When a fallback connection expires, the system will try to establish a new
# connection to the other hosts primary interface (falling back to another
# interface again if necessary).
# Note: The priority of node interfaces can be configured using the
# "connInterfacesFile" parameter.
# Default: 900
# [connInterfacesFile]
# The path to a text file that specifies the names of the interfaces which
# may be used for communication by other nodes. One interface per line. The
# line number also defines the priority of an interface.
# Example: "ib0" in the first line, "eth0" in the second line.
# Values: This setting is optional. If unspecified, all available interfaces
# will be published and priorities will be assigned automatically.
# Note: This information is sent to other hosts to inform them about possible
# communication paths. See connRestrictOutboundInterfaces for this
# configuration's potential effect on outbound connections.
# Default: <none>
# [connInterfacesList]
# Comma-separated list of interface names. Performs the same function as
# connInterfacesFile.
# Default: <none>
# [connRestrictOutboundInterfaces]
# The default behavior of BeeGFS is to use any available network interface
# to establish an outbound connection to a node, according to the TCP/IP
# configuration of the operating system. When connRestrictOutboundInterfaces
# is set to true, the network interfaces used for outbound connections are
# limited to the values specified by connInterfacesFile or connInterfacesList.
# The operating system routing tables are consulted to determine which
# interface to use for a particular node's IP address. If there is no
# route from the configured interfaces that is suitable for a node's IP
# addresses then the connection will fail to be established.
# Default: false
# [connNoDefaultRoute]
# When connRestrictOutboundInterfaces is true, the routing logic would use
# the default route for a Node's IP address when no specific route for that
# address is found in the routing tables. This can be problematic during a
# failure situation, as the default route is not appropriate to use for a
# subnet that is accessible from an interface that has failed.
# connNoDefaultRoute is a comma-separated list of CIDRs that should never
# be accessed via the default route.
# Default: 0.0.0.0/0. This prevents the default route from ever being used.
# [connMaxInternodeNum]
# The maximum number of simultaneous connections to the same node.
# Default: 32
# [connMetaPort]
# The UDP and TCP port of the metadata node.
# Default: 8005
# [connMgmtdPort]
# The UDP and TCP port of the management node.
# Default: 8008
# [connPortShift]
# Shifts all following UDP and TCP ports according to the specified value.
# Intended to make port configuration easier in case you do not want to
# configure each port individually.
# Default: 0
# [connNetFilterFile]
# The path to a text file that specifies allowed IP subnets, which may be used
# for outgoing communication. One subnet per line in classless notation (IP
# address and number of significant bits).
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
# line.
# Values: This setting is optional. If unspecified, all addresses are allowed
# for outgoing communication.
# Default: <none>
# [connTCPRcvBufSize], [connUDPRcvBufSize]
# Sets the size for TCP and UDP socket receive buffers (SO_RCVBUF). The maximum
# allowed value is determined by sysctl net.core.rmem_max. This value is
# ignored if it is less than the default value determined by
# net.core.rmem_default.
# For legacy reasons, the default value 0 indicates that the buffer size is set
# to connRDMABufNum * connRDMABufSize.
# -1 indicates that the buffer size should be left at the system default.
# Default: 0
# [connUseRDMA]
# Enables the use of Remote Direct Memory Access (RDMA) for Infiniband.
# This setting only has effect if libbeegfs-ib is installed.
# Default: true
# [connRDMABufNum], [connRDMABufSize]
# Infiniband RDMA buffer settings.
# connRDMABufSize is the maximum size of a buffer (in bytes) that will be sent
# over the network; connRDMABufNum is the number of available buffers that can
# be in flight for a single connection. These client settings are also applied
# on the server side for each connection.
# Note: RAM usage per connection is connRDMABufSize x connRDMABufNum x 2. Keep
# resulting RAM usage (x connMaxInternodeNum x number_of_clients) on the
# server in mind when increasing these values.
# Note: The client needs to allocate physically contiguous pages for
# connRDMABufSize, so this setting shouldn't be higher than a few kbytes.
# Default: 8192, 70
# [connRDMATypeOfService]
# Infiniband provides the option to set a type of service for an application.
# This type of service can be used by your subnet manager to provide Quality of
# Service functionality (e.g. setting different service levels).
# In openSM the service type will be mapped to the parameter qos-class, which
# can be handled in your QoS configuration.
# See
# www.openfabrics.org/downloads/OFED/ofed-1.4/OFED-1.4-docs/
# QoS_management_in_OpenSM.txt
# for more information on how to configure openSM for QoS.
# This parameter sets the type of service for all outgoing connections of this
# daemon.
# Default: 0 (Max: 255)
# [connTcpOnlyFilterFile]
# The path to a text file that specifies IP address ranges to which no RDMA
# connection should be established. This is useful e.g. for environments where
# all hosts support RDMA, but some hosts cannot connect via RDMA to some other
# hosts.
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
# line.
# Values: This setting is optional.
# Default: <none>
# [connMessagingTimeouts]
# These constants are used to set some of the connection timeouts for sending
# and receiving data between services in the cluster. They used to be hard-coded
# (CONN_LONG_TIMEOUT, CONN_MEDIUM_TIMEOUT and CONN_SHORT_TIMEOUT) but are now
# made configurable for experimentation purposes.
# This option takes three integer values of milliseconds, separated by a comma
# in the order long, medium, short.
# WARNING: This is an EXPERIMENTAL configuration option that should not be
# changed in production environments unless properly tested and validated.
# Some configurations can lead to service lockups and other subtle issues.
# Please make sure that you know exactly what you are doing and properly
# test any changes you make.
# Default: 600000,90000,30000
# [connRDMATimeouts]
# These constants are used to set some of the timeouts for sending and receiving
# data between services in the cluster via RDMA. They used to be
# hard-coded IBVSOCKET_CONN_TIMEOUT_MS, IBVSOCKET_FLOWCONTROL_ONSEND_TIMEOUT_MS
# and a 10000 literal for poll timeout but are now made configurable for
# experimentation purposes.
# This option takes three integer values of milliseconds, separated by a comma
# in the order connectMS, flowSendMS and pollMS.
# WARNING: This is an EXPERIMENTAL configuration option that should not be
# changed in production environments unless properly tested and validated.
# Some configurations can lead to service lockups and other subtle issues.
# Please make sure that you know exactly what you are doing and properly
# test any changes you make.
# Default: 3000,180000,7500
#
# --- Section 4.2: [Logging] ---
#
# [logType]
# Defines the logger type. This can either be "syslog" to send log messages to
# the general system logger or "logfile". If set to logfile logs will be written
# to logStdFile.
# Default: logfile
# [logLevel]
# Defines the amount of output messages. The higher this level, the more
# detailed the log messages will be.
# Note: Levels above 3 might decrease performance.
# Default: 3 (Max: 5)
# [logNoDate]
# Defines whether "date & time" (=false) or the current "time only" (=true)
# should be logged.
# Default: false
# [logNumLines]
# The maximum number of lines per log file.
# Default: 50000
# [logNumRotatedFiles]
# The number of old files to keep when "logNumLines" is reached and the log file
# is rewritten (log rotation).
# Default: 5
# [logStdFile]
# The path and filename of the log file for standard log messages. The parameter
# will be considered only if logType value is not equal to syslog. If no name
# is specified, the messages will be written to the console.
# Default: /var/log/beegfs-meta.log
#
# --- Section 4.3: [Startup] ---
#
# [runDaemonized]
# Detach the process from its parent (and from stdin/-out/-err).
# Default: true
#
# --- Section 4.4: [Storage] ---
#
# [storeClientXAttrs]
# Enables client-side extended attributes.
# Note: Can only be enabled if the underlying file system supports extended
# attributes.
# Note: This setting has to be explicitly enabled on the clients as well.
# Default: false
# [storeClientACLs]
# Enables the handling and storage of client-side access control lists.
# As ACLs are stored as extended attributes, this setting mainly concerns the
# enforcement and server-side propagation of directory default ACLs.
# Note: This setting can only be enabled if storeClientXAttrs is set to true.
# Note: This setting has to be explicitly enabled on all clients as well.
# Note: Enabling this setting can affect metadata performance.
# Default: false
# [storeUseExtendedAttribs]
# Controls whether BeeGFS metadata is stored as normal file contents (=false)
# or as extended attributes (=true) on the underlying files system. Depending on
# the type and version of your underlying local file system, extended attributes
# typically are significantly faster.
# Note: This setting can only be configured at first startup and cannot be
# changed afterwards.
# Default: true
#
# --- Section 4.5: [System Settings] ---
#
# [sysTargetAttachmentFile]
# This file provides a specification of which targets should be grouped within
# the same domain for randominternode target chooser. This is useful
# e.g. if randominternode is used with multiple storage daemon
# instances running on the same physical hosts when files should be striped
# across different physical hosts.
# Format: Line-separated <targetID>=<domainID> definition.
# Example: "101=1" in first line, "102=1" in second line, "201=2" in third
# line to define that targets "101" and "102" are part of the same
# domain "1", while target "201" is part of a different domain "2". The
# domain IDs in this file are arbitrary values in range 1..65535, the
# targetIDs are actual targetIDs as in "beegfs-ctl --listtargets".
# Default: <none>
# [sysTargetOfflineTimeoutSecs]
# Timeout until the metadata nodes and storage targets are considered offline
# when no target state updates can be fetched from that node.
# Note: This must be the same value as in the /etc/beegfs/beegfs-mgmtd.conf on
# the management node.
# Values: time in seconds
# Default: 180
# [sysAllowUserSetPattern]
# If set to true, non-privileged users are allowed to modify stripe pattern
# settings for directories they own.
# Default: false
# [sysFileEventLogTarget]
# If set, the metadata server will log modification events (which it receives
# from clients) to a Unix Socket specified here. External tools may listen on
# this socket and process the information.
# Note: Each event will be logged in the following format:
# droppedSeqNo (64bit) - missedSeqNo (64bit) - eventType (32bit) - ModifiedPath
# Increased dropped sequence numbers indicate communication errors. Increased
# missed sequence numbers indicate that a event could not be properly reported.
# The following event types are possible:
# 0(file contents flushed), 1(truncate), 2(set attribute), 3(file closed),
# 4(create), 5(mkdir), 6(mknode), 7(create symlink), 8(rmdir), 9(unlink),
# 10(create hardlink), 11(rename), 12(read)
# Default: <unset>
# Example: sysFileEventLogTarget = unix:/run/beegfs/eventlog
# [sysFileEventPersistDirectory]
# If set, the metadata server will persist modification events to this
# directory. If unset, will persist to "eventq" subdirectory under
# metadata-root.
# When the metadata server starts up, it will try to create that directory
# (non-recursive mkdir()) and initialize a new event persist store in this
# directory.
# If creating the directory fails with EEXIST (directory exists) it will assume
# an existing persist store and try to load it.
# Default: <unset> (storeMetaDirectory + "/eventq")
# [sysFileEventPersistSize]
# If event logging is enabled (see sysFileEventLogTarget), this control
# explicitly sets the size (in bytes) for creating the chunk-store file in the
# eventq directory (sysFileEventPersistDirectory).
# The chunk-store is a file containing a ringbuffer where events get
# persisted. The events will eventually be delivered to downstream services
# (for example bee-watch, hive-index).
# Note that this value has no effect when loading an existing queue directory.
# Default: 0 (use internal defaults / guesswork)
# Example: sysFileEventPersistSize = 2g # 2 Gigabytes
# Note: the value will be rounded up to the next power of 2.
#
# --- Section 4.6: [Tuning] ---
#
# [tuneBindToNumaZone]
# Defines the zero-based NUMA zone number to which all threads of this process
# should be bound. If unset, all available CPU cores may be used.
# Zone binding is especially useful if the corresponding devices (e.g. storage
# controller and network card) are also attached to the same zone.
# Note: The Linux kernel shows NUMA zones at /sys/devices/system/node/nodeXY
# Default: <unset>
# [tuneNumStreamListeners]
# The number of threads waiting for incoming data events. Connections with
# incoming data will be handed over to the worker threads for actual message
# processing.
# Default: 1
# [tuneNumWorkers]
# The number of worker threads. Higher number of workers allows the server to
# handle more client requests in parallel. On dedicated metadata servers, this
# is typically set to a value between four and eight times the number of CPU
# cores.
# Note: 0 means use twice the number of CPU cores (but at least 4).
# Default: 0
# [tuneTargetChooser]
# The algorithm to choose storage targets for file creation.
# Values:
# * randomized: choose targets in a random fashion.
# * roundrobin: choose targets in a deterministic round-robin fashion.
# (Use this only for benchmarking of large-file streaming throughput.)
# * randomrobin: randomized round-robin; choose targets in a deterministic
# round-robin fashion, but random shuffle the result targets list.
# * randominternode: choose random targets that are assigned to different
# storage nodeIDs. (See sysTargetAttachmentFile if multiple storage
# storage daemon instances are running on the same physical host.)
# Note: Only the randomized chooser honors client's preferred nodes/targets
# settings.
# Default: randomized
# [tuneUseAggressiveStreamPoll]
# If set to true, the StreamListener component, which waits for incoming
# requests, will keep actively polling for events instead of sleeping until
# an event occurs. Active polling will reduce latency for processing of
# incoming requests at the cost of higher CPU usage.
# Default: false
# [tuneUsePerUserMsgQueues]
# If set to true, per-user queues will be used to decide which of the pending
# requests is handled by the next available worker thread. If set to false,
# a single queue will be used and incoming requests will be processed in
# first-come, first-served order.
# Per-user queues are intended to improve fairness in multi-user environments.
# Default: false
# [tuneWorkerBufSize]
# The buffer size, which is allocated twice by each worker thread for IO and
# network data buffering.
# Note: For optimal performance, this value must be at least 128k.
# Default: 1m
# [tuneNumResyncSlaves]
# The number of threads used to perform the bulk synchronizations for a buddy
# mirror resync.
# Default: 12
#
# --- Section 4.7: [Quota settings] ---
#
# [quotaEnableEnforcement]
# Enables enforcement of user and group quota limits by periodically checking
# if the limits are exceeded.
# Note: This uses quota information provided by the underlying local file
# systems of the storage targets.
# Note: Set quota limits with "beegfs-ctl --setquota".
# Note: If this option is true, performance might be slightly decreased due to
# extra information tracking.
# Note: Must be set to the same value in storage servers and mgmtd to be
# effective.
# Default: false
#
# --- Section 5: [Expert options] ---
#
# [storeSelfHealEmptyFiles]
# Delete metadata entries with no content and handle them as though they had
# not existed. Metadata entries with no content can be created by backup
# software that has incorrectly saved or restored metadata.
# Default: true
# [tuneNumCommSlaves]
# Number of threads dedicated to parallel communication with other nodes.
# Default: 2 * tuneNumWorkers
# [tuneCommSlaveBufSize]
# Buffer size used by communication threads, analogous to tuneWorkerBufSize.
# Default: 1m
# [tuneDefaultChunkSize], [tuneDefaultNumStripeTargets]
# Chunk size and number of targets to use when creating the root directory.
# Files and directories inherit these setting from their parents.
# Default: 512k, 4
# [tuneProcessFDLimit]
# Sets the maximum number of files the server can open. If the process rlimit
# is already larger than this number the limit will not be decreased.
# Default: 50000
# [tuneWorkerNumaAffinity]
# Distributes worker threads equally among NUMA nodes on the system when set.
# Default: false
# [tuneListenerNumaAffinity]
# Distributes listener threads equally among NUMA nodes on the system when set.
# Default: false
# [tuneListenerPrioShift]
# Applies a niceness offset to listener threads. Negative values will decrease
# niceness (increse priority), positive values will increase niceness (decrease
# priority).
# Default: -1
# [tuneDirMetadataCacheLimit]
# Number of recently used directory structures to keep in memory.
# Increasing this value may reduce memory allocations and disk I/O.
# Default: 1024
# [tuneLockGrantWaitMS], [tuneLockGrantNumRetries]
# Acknowledgement wait parameters for lock grant messages.
# Locks that are granted asynchronously (ie a client is waiting on the lock)
# notify waiting clients with UDP packets. For each waiter a notification
# packet is sent and the server waits for tuneLockGrantWaitMS to receive an
# acknowledgement from the client. This process is repeated up to
# tuneLockGrantNumRetries times,
# Default: 333, 15
# [tuneRotateMirrorTargets]
# Choose mirror targets for RAID10 patterns by rotating the selected targets.
# Default: false
# [tuneEarlyUnlinkResponse]
# Respond to unlink messages before chunk files have been unlinked.
# Default: true
# [tuneMirrorTimestamps]
# When buddy mirroring, mirror timestamps as exactly as possible. When this is
# set to `false` timestamps of mirrored files may be incorrect after a failover
# has occured. Disabling timestamp mirroring gives a slight performance boost.
# Default: true
# [tuneDisposalGCPeriod]
# If > 0, disposal files will not be removed instantly. Insead a garbage collector
# will run on each meta node. This sets the Wait time in seconds between runs.
# Default: 0
# [quotaEarlyChownResponse]
# Respond to client chown() requests before chunk files have been changed.
# Quota relies on chunk files having the owner and group information stored in
# metadata. Therefore, setting this to true creates a short time window after
# a chown where the application and the servers have a different view on quota.
# Default: true
# [pidFile]
# Creates a PID file for the daemon when set. Set by init scripts.
# Default: <none>

29
meta/build/dist/etc/default/beegfs-meta vendored Normal file
View File

@@ -0,0 +1,29 @@
# BeeGFS metadata service configuration.
# Note: This file is only used together with sysV init scripts.
# If your system uses systemd, this file is ignored.
# In this case:
#
# - use `systemctl enable / disable` to activate / decativate a service
#
# - systemd service templates are used for multimode
# (See https://www.beegfs.io/wiki/MultiMode)
#
#
# Set to "NO" to disable start of the BeeGFS metadata daemon via the init
# script.
START_SERVICE="YES"
# Set to "YES" if you want to start multiple metadata daemons with different
# configuration files on this machine.
#
# Create a subdirectory with the ending ".d" in "/etc/beegfs/" for every config
# file. The subdirectory name will be used to identify a particular server
# instance for init script start/stop.
#
# Note: The original config file in /etc/beegfs will not be used when multi-mode
# is enabled.
#
# Example: /etc/beegfs/scratch.d/beegfs-meta.conf
# $ /etc/init.d/beegfs-meta start scratch
MULTI_MODE="NO"

247
meta/build/dist/sbin/beegfs-setup-meta vendored Executable file
View File

@@ -0,0 +1,247 @@
#!/bin/bash
# License: BeeGFS EULA
# constant definitions
# (for configurables see below)
DEFAULT_CFG_PATH="/etc/beegfs/beegfs-meta.conf"
STORAGE_PATH_CFG_KEY="storeMetaDirectory"
MGMTD_CFG_KEY="sysMgmtdHost"
ALLOW_INIT_CFG_KEY="storeAllowFirstRunInit"
FS_UUID_CFG_KEY="storeFsUUID"
SERVER_NUMID_FILE="nodeNumID"
FORMAT_FILENAME="format.conf"
FORMAT_FILE_VERSION="4"
XATTR_CFG_KEY="storeUseExtendedAttribs"
print_usage()
{
echo
echo "DESCRIPTION: Initialize metadata storage directory for beegfs-meta server daemon"
echo "and update the beegfs-meta config file."
echo
echo "USAGE: `basename $0` -p <metadata_path> [options]"
echo
echo " Mandatory Options:"
echo
echo " -p <path> - Path to metadata storage directory that is to be initialized."
echo " (Path will also be added to config file.)"
echo
echo " Recommended Options:"
echo
echo " -s <num> - Assign the given numeric ID to the server of this storage"
echo " directory (range 1..65535). (Default: Randomly select a free ID.)"
echo
echo " -m <host> - Hostname (or IP address) of management server."
echo " (Will be stored in server config file.)"
echo
echo " Other Options:"
echo
echo " -C - Do not update server config file."
echo
echo " -c <path> - Path to server config file."
echo " (Default: ${DEFAULT_CFG_PATH})"
echo
echo " -f - Force actions, ignore warnings."
echo
echo " -h - Print this help."
echo
echo " -u - Do not disable usage of uninitialized storage directory in config"
echo " file and do not store the UUID of the underlying fs."
echo
echo " -x - Do not store metadata as extended attributes."
echo
echo "NOTES:"
echo " * All given IDs must be unique in their service class for the whole file system"
echo " instance, i.e. there can only be one beegfs-meta service with ID 2, but there"
echo " can also be a beegfs-storage service with ID 2 in the file system."
echo
echo " * BeeGFS servers can also run without pre-initializing storage directories, if"
echo " storeAllowFirstRunInit=true is set in the server config files (which is"
echo " usually not recommended)."
echo
echo "EXAMPLES:"
echo " * Numeric IDs can generally be chosen arbitrarily. However, it is usually a"
echo " good idea to pick a numeric ID that matches the hostname, e.g. if the"
echo " hostname is \"meta02\", you would use \"2\" as numeric ID for the beegfs-meta"
echo " service on this server."
echo
echo " * Example 1) Initialize metadata storage directory of first metadata server and"
echo " set \"storage01\" as management daemon host in config file:"
echo " $ `basename $0` -p /mnt/myraid1/beegfs-meta -s 1 -m storage01"
echo
}
# initialize storage directory (if enabled)
init_storage_dir()
{
# check if storage path is defined
if [ -z "${STORAGE_PATH}" ]; then
return 0
fi
# create storage path
echo "Preparing storage directory: ${STORAGE_PATH}"
mkdir -p "${STORAGE_PATH}"
# make sure storage dir is empty
if [ -z "${FORCE_ACTIONS}" ] && [ "$(ls -AI lost+found ${STORAGE_PATH} )" ]; then
echo " * ERROR: Storage directory is not empty. Initialization of non-empty" \
"directories can lead to data loss or orphaned data. ('-f' disables this check.)"
exit 1
fi
# create format file
echo " * Creating ${FORMAT_FILENAME} file..."
FORMAT_FILE_PATH="${STORAGE_PATH}/${FORMAT_FILENAME}"
echo "# This file was auto-generated. Do not modify it!" >> ${FORMAT_FILE_PATH}
echo "version=${FORMAT_FILE_VERSION}" >> ${FORMAT_FILE_PATH}
echo "xattr=${FORMAT_USE_XATTR}" >> ${FORMAT_FILE_PATH}
# create ID files
if [ -n "${SERVER_NUMID}" ]; then
echo " * Creating server numeric ID file: ${STORAGE_PATH}/${SERVER_NUMID_FILE}"
echo "${SERVER_NUMID}" > "${STORAGE_PATH}/${SERVER_NUMID_FILE}"
fi
}
# update config file (if enabled)
update_config_file()
{
# check if config file is defined
if [ -z "${CFG_PATH}" ]; then
return 0
fi
echo "Updating config file: ${CFG_PATH}"
if [ ! -f "${CFG_PATH}" ]; then
echo " * ERROR: Config file not found: ${CFG_PATH}"
exit 1
fi
if [ -n "${MGMTD_HOST}" ]; then
echo " * Setting management host: ${MGMTD_HOST}"
sed -i "s/\(^${MGMTD_CFG_KEY}.*=\).*/\1 ${MGMTD_HOST}/" ${CFG_PATH}
fi
if [ -n "${STORAGE_PATH}" ]; then
echo " * Setting storage directory in config file..."
sed -i "s|\(^${STORAGE_PATH_CFG_KEY}.*=\).*$|\1 ${STORAGE_PATH}|" ${CFG_PATH}
fi
if [ -n "${DISABLE_UNINITED_TARGETS}" ] && [ -n "${STORAGE_PATH}" ]; then
echo " * Disabling usage of uninitialized storage directory in config file..."
sed -i "s/\(^${ALLOW_INIT_CFG_KEY}.*=\).*/\1 false/" ${CFG_PATH}
echo " * Fetching the underlying device..."
DEVICE=$(df "${STORAGE_PATH}" | tail -n1 | cut -d\ -f1)
echo "Underlying device detected: ${DEVICE}"
echo "Fetching UUID of the file system on that device..."
UUID=$(blkid -s UUID ${DEVICE} | cut -d= -f2 | sed "s/\"//g")
echo "Found UUID ${UUID}"
echo "Writing UUID to config file..."
sed -i "s|\(^${FS_UUID_CFG_KEY}.*=\).*$|\1 ${UUID}|" ${CFG_PATH}
fi
if [ -n "${STORAGE_PATH}" ]; then
echo " * Setting usage of extended attributes to: ${FORMAT_USE_XATTR}"
sed -i "s/\(^${XATTR_CFG_KEY}.*=\).*/\1 ${FORMAT_USE_XATTR}/" ${CFG_PATH}
fi
}
################## end of function definitions ##############
# configurable values and their defaults
# (for constants see above)
CFG_PATH="$DEFAULT_CFG_PATH" # empty path means "don't update cfg file"
FORCE_ACTIONS=""
FORMAT_USE_XATTR="true"
MGMTD_HOST=""
SERVER_NUMID=""
STORAGE_PATH=""
DISABLE_UNINITED_TARGETS="1"
# parse command line arguments
# (see print_usage() for description of parameters)
while getopts "Cc:fhm:p:S:s:ux" opt; do
case $opt in
C)
CFG_PATH=""
;;
c)
CFG_PATH="$OPTARG"
;;
f)
FORCE_ACTIONS="1"
;;
h)
print_usage
exit 0
;;
m)
MGMTD_HOST="$OPTARG"
;;
p)
STORAGE_PATH="$OPTARG"
;;
S)
echo "WARNING: The -S flag previously used to specify a string ID been deprecated and now has no effect. Starting in BeeGFS string IDs were replaced with aliases configured using BeeGFS CTL."
;;
s)
SERVER_NUMID="$OPTARG"
;;
u)
DISABLE_UNINITED_TARGETS=""
;;
x)
FORMAT_USE_XATTR="false"
;;
*)
echo "ERROR: Invalid argument" >&2
print_usage
exit 1
;;
esac
done
set -e
# don't do anything if no arguments are provided
if [ $# -eq 0 ]; then
print_usage
exit 1
fi
# make sure storage dir is defined
if [ -z "${STORAGE_PATH}" ]; then
echo "ERROR: Storage directory is undefined." >&2
echo
print_usage
exit 1
fi
# initialize storage directory
init_storage_dir
# update config file
update_config_file
echo "All done."

View File

@@ -0,0 +1,14 @@
[Unit]
Description=BeeGFS Metadata Server
Documentation=http://www.beegfs.com/content/documentation/
Requires=network-online.target
# We disable the wants service, because it spams the log files
#Wants=beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service
After=network-online.target beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service zfs.target
[Service]
ExecStart=/opt/beegfs/sbin/beegfs-meta cfgFile=/etc/beegfs/beegfs-meta.conf runDaemonized=false
Type=simple
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,14 @@
[Unit]
Description=BeeGFS Metadata Server (multimode)
Documentation=http://www.beegfs.com/content/documentation/
Requires=network-online.target
# We disable the wants service, because it spams the log files
#Wants=beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service
After=network-online.target beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service zfs.target
[Service]
ExecStart=/opt/beegfs/sbin/beegfs-meta cfgFile=/etc/beegfs/%I.d/beegfs-meta.conf runDaemonized=false
Type=simple
[Install]
WantedBy=multi-user.target

1656
meta/source/app/App.cpp Normal file

File diff suppressed because it is too large Load Diff

511
meta/source/app/App.h Normal file
View File

@@ -0,0 +1,511 @@
#pragma once
#include <app/config/Config.h>
#include <common/Common.h>
#include <common/app/log/LogContext.h>
#include <common/app/log/Logger.h>
#include <common/app/AbstractApp.h>
#include <common/components/StatsCollector.h>
#include <common/components/streamlistenerv2/ConnAcceptor.h>
#include <common/components/streamlistenerv2/StreamListenerV2.h>
#include <common/components/worker/Worker.h>
#include <common/components/TimerQueue.h>
#include <common/nodes/MirrorBuddyGroupMapper.h>
#include <common/nodes/NodeCapacityPools.h>
#include <common/nodes/NodeStoreClients.h>
#include <common/nodes/NodeStoreServers.h>
#include <common/nodes/RootInfo.h>
#include <common/nodes/TargetCapacityPools.h>
#include <common/nodes/TargetMapper.h>
#include <common/nodes/TargetStateStore.h>
#include <common/storage/quota/ExceededQuotaPerTarget.h>
#include <common/toolkit/AcknowledgmentStore.h>
#include <common/toolkit/NetFilter.h>
#include <components/DatagramListener.h>
#include <components/FileEventLogger.h>
#include <components/InternodeSyncer.h>
#include <components/buddyresyncer/BuddyResyncer.h>
#include <net/message/NetMessageFactory.h>
#include <nodes/MetaNodeOpStats.h>
#include <session/SessionStore.h>
#include <storage/DirInode.h>
#include <storage/MetaStore.h>
#include <storage/SyncedDiskAccessPath.h>
#ifndef BEEGFS_VERSION
#error BEEGFS_VERSION undefined
#endif
// program return codes
#define APPCODE_NO_ERROR 0
#define APPCODE_INVALID_CONFIG 1
#define APPCODE_INITIALIZATION_ERROR 2
#define APPCODE_RUNTIME_ERROR 3
typedef std::list<Worker*> WorkerList;
typedef WorkerList::iterator WorkerListIter;
typedef std::vector<StreamListenerV2*> StreamLisVec;
typedef StreamLisVec::iterator StreamLisVecIter;
// forward declarations
class LogContext;
class ModificationEventFlusher;
class App : public AbstractApp
{
public:
App(int argc, char** argv);
virtual ~App();
virtual void run() override;
virtual void stopComponents() override;
virtual void handleComponentException(std::exception& e) override;
virtual void handleNetworkInterfaceFailure(const std::string& devname) override;
void handleNetworkInterfacesChanged(NicAddressList nicList);
private:
int appResult;
int argc;
char** argv;
Config* cfg;
LogContext* log;
std::list<std::string> allowedInterfaces;
LockFD pidFileLockFD;
LockFD workingDirLockFD;
NetFilter* netFilter; // empty filter means "all nets allowed"
NetFilter* tcpOnlyFilter; // for IPs that allow only plain TCP (no RDMA etc)
std::shared_ptr<Node> localNode;
NodeStoreServers* mgmtNodes;
NodeStoreServers* metaNodes;
NodeStoreServers* storageNodes;
NodeStoreClients* clientNodes;
RootInfo metaRoot;
NodeCapacityPools* metaCapacityPools;
NodeCapacityPools* metaBuddyCapacityPools;
TargetMapper* targetMapper;
MirrorBuddyGroupMapper* storageBuddyGroupMapper; // maps storage targets to buddy groups
MirrorBuddyGroupMapper* metaBuddyGroupMapper; // maps meta nodes to buddy groups
TargetStateStore* targetStateStore; // map storage targets to a state
TargetStateStore* metaStateStore; // map mds targets (i.e. nodeIDs) to a state
std::unique_ptr<StoragePoolStore> storagePoolStore; // stores (category) storage pools
MultiWorkQueue* workQueue;
MultiWorkQueue* commSlaveQueue;
NetMessageFactory* netMessageFactory;
MetaStore* metaStore;
DirInode* rootDir;
bool isRootBuddyMirrored;
DirInode* disposalDir;
DirInode* buddyMirrorDisposalDir;
SessionStore* sessions;
SessionStore* mirroredSessions;
AcknowledgmentStore* ackStore;
MetaNodeOpStats* nodeOperationStats; // file system operation statistics
std::string metaPathStr; // the general parent directory for all saved data
Path* inodesPath; // contains the actualy file/directory metadata
Path* dentriesPath; // contains the file/directory structural links
Path* buddyMirrorInodesPath; // contains the inodes for buddy mirrored inodes
Path* buddyMirrorDentriesPath; // contains the dentries for buddy mirrored dentries
DatagramListener* dgramListener;
ConnAcceptor* connAcceptor;
StatsCollector* statsCollector;
InternodeSyncer* internodeSyncer;
ModificationEventFlusher* modificationEventFlusher;
TimerQueue* timerQueue;
TimerQueue* gcQueue;
unsigned numStreamListeners; // value copied from cfg (for performance)
StreamLisVec streamLisVec;
WorkerList workerList;
WorkerList commSlaveList; // used by workers for parallel comm tasks
BuddyResyncer* buddyResyncer;
ExceededQuotaPerTarget exceededQuotaStores;
std::unique_ptr<FileEventLogger, decltype(&destroyFileEventLogger)> fileEventLogger { nullptr, &destroyFileEventLogger };
unsigned nextNumaBindTarget; // the numa node to which we will bind the next component thread
void runNormal();
void streamListenersInit();
void streamListenersStart();
void streamListenersStop();
void streamListenersDelete();
void streamListenersJoin();
void workersInit();
void workersStart();
void workersStop();
void workersDelete();
void workersJoin();
void commSlavesInit();
void commSlavesStart();
void commSlavesStop();
void commSlavesDelete();
void commSlavesJoin();
void initLogging();
void initDataObjects();
void initBasicNetwork();
void initLocalNodeIDs(NumNodeID& outLocalNumID);
void initLocalNode(NumNodeID localNodeNumID);
void initLocalNodeNumIDFile(NumNodeID localNodeNumID);
bool preinitStorage();
void checkTargetUUID();
void initStorage();
void initXAttrLimit();
void initRootDir(NumNodeID localNodeNumID);
void initDisposalDir();
void initComponents(TargetConsistencyState initialConsistencyState);
void startComponents();
void joinComponents();
bool waitForMgmtNode();
bool preregisterNode(NumNodeID& outLocalNodeNumID);
bool downloadMgmtInfo(TargetConsistencyState& outInitialConsistencyState);
void logInfos();
void daemonize();
void registerSignalHandler();
static void signalHandler(int sig);
bool restoreSessions();
bool storeSessions();
bool deleteSessionFiles();
public:
// inliners
/**
* @return NULL for invalid node types
*/
NodeStoreServers* getServerStoreFromType(NodeType nodeType) const
{
switch(nodeType)
{
case NODETYPE_Meta:
return metaNodes;
case NODETYPE_Storage:
return storageNodes;
case NODETYPE_Mgmt:
return mgmtNodes;
default:
return NULL;
}
}
/**
* @return NULL for invalid node types
*/
AbstractNodeStore* getAbstractNodeStoreFromType(NodeType nodeType) const
{
switch(nodeType)
{
case NODETYPE_Meta:
return metaNodes;
case NODETYPE_Storage:
return storageNodes;
case NODETYPE_Client:
return clientNodes;
case NODETYPE_Mgmt:
return mgmtNodes;
default:
return NULL;
}
}
/**
* Get one of the available stream listeners based on the socket file descriptor number.
* This is to load-balance the sockets over all available stream listeners and ensure that
* sockets are not bouncing between different stream listeners.
*
* Note that IB connections eat two fd numbers, so 2 and multiples of 2 might not be a good
* value for number of stream listeners.
*/
virtual StreamListenerV2* getStreamListenerByFD(int fd) override
{
return streamLisVec[fd % numStreamListeners];
}
// getters & setters
virtual const ICommonConfig* getCommonConfig() const override
{
return cfg;
}
virtual const NetFilter* getNetFilter() const override
{
return netFilter;
}
virtual const NetFilter* getTcpOnlyFilter() const override
{
return tcpOnlyFilter;
}
virtual const AbstractNetMessageFactory* getNetMessageFactory() const override
{
return netMessageFactory;
}
AcknowledgmentStore* getAckStore() const
{
return ackStore;
}
Config* getConfig() const
{
return cfg;
}
void updateLocalNicList(NicAddressList& localNicList);
/*
* this is just a convenience wrapper for now; old code used to have the localNodeNumID as a
* member of App, but localNodeNumID and the numID in localNode are duplicates
*/
NumNodeID getLocalNodeNumID() const
{
return localNode->getNumID();
}
Node& getLocalNode() const
{
return *localNode;
}
NodeStoreServers* getMgmtNodes() const
{
return mgmtNodes;
}
NodeStoreServers* getMetaNodes() const
{
return metaNodes;
}
NodeStoreServers* getStorageNodes() const
{
return storageNodes;
}
NodeStoreClients* getClientNodes() const
{
return clientNodes;
}
NodeCapacityPools* getMetaCapacityPools() const
{
return metaCapacityPools;
}
TargetMapper* getTargetMapper() const
{
return targetMapper;
}
MirrorBuddyGroupMapper* getStorageBuddyGroupMapper() const
{
return storageBuddyGroupMapper;
}
MirrorBuddyGroupMapper* getMetaBuddyGroupMapper() const
{
return metaBuddyGroupMapper;
}
TargetStateStore* getTargetStateStore() const
{
return targetStateStore;
}
TargetStateStore* getMetaStateStore() const
{
return metaStateStore;
}
NodeCapacityPools* getMetaBuddyCapacityPools() const
{
return metaBuddyCapacityPools;
}
MultiWorkQueue* getWorkQueue() const
{
return workQueue;
}
MultiWorkQueue* getCommSlaveQueue() const
{
return commSlaveQueue;
}
MetaStore* getMetaStore() const
{
return metaStore;
}
DirInode* getRootDir() const
{
return rootDir;
}
DirInode* getDisposalDir() const
{
return disposalDir;
}
DirInode* getBuddyMirrorDisposalDir() const
{
return buddyMirrorDisposalDir;
}
SessionStore* getSessions() const
{
return sessions;
}
SessionStore* getMirroredSessions() const
{
return mirroredSessions;
}
std::string getMetaPath() const
{
return metaPathStr;
}
MetaNodeOpStats* getNodeOpStats() const
{
return nodeOperationStats;
}
const Path* getInodesPath() const
{
return inodesPath;
}
const Path* getDentriesPath() const
{
return dentriesPath;
}
const Path* getBuddyMirrorInodesPath() const
{
return buddyMirrorInodesPath;
}
const Path* getBuddyMirrorDentriesPath() const
{
return buddyMirrorDentriesPath;
}
DatagramListener* getDatagramListener() const
{
return dgramListener;
}
const StreamLisVec* getStreamListenerVec() const
{
return &streamLisVec;
}
StatsCollector* getStatsCollector() const
{
return statsCollector;
}
InternodeSyncer* getInternodeSyncer() const
{
return internodeSyncer;
}
TimerQueue* getTimerQueue() const
{
return timerQueue;
}
TimerQueue* getGcQueue() const
{
return gcQueue;
}
ModificationEventFlusher* getModificationEventFlusher() const
{
return modificationEventFlusher;
}
WorkerList* getWorkers()
{
return &workerList;
}
BuddyResyncer* getBuddyResyncer()
{
return this->buddyResyncer;
}
int getAppResult() const
{
return appResult;
}
const ExceededQuotaPerTarget* getExceededQuotaStores() const
{
return &exceededQuotaStores;
}
StoragePoolStore* getStoragePoolStore() const
{
return storagePoolStore.get();
}
FileEventLogger* getFileEventLogger()
{
return fileEventLogger.get();
}
const RootInfo& getMetaRoot() const { return metaRoot; }
RootInfo& getMetaRoot() { return metaRoot; }
void findAllowedInterfaces(NicAddressList& outList) const;
void findAllowedRDMAInterfaces(NicAddressList& outList) const;
};

View File

@@ -0,0 +1,326 @@
#include <common/nodes/TargetCapacityPools.h>
#include <common/system/System.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/UnitTk.h>
#include "Config.h"
#define CONFIG_DEFAULT_CFGFILENAME "/etc/beegfs/beegfs-meta.conf"
#define TARGETCHOOSERTYPE_RANDOMIZED_STR "randomized"
#define TARGETCHOOSERTYPE_ROUNDROBIN_STR "roundrobin"
#define TARGETCHOOSERTYPE_RANDOMROBIN_STR "randomrobin"
#define TARGETCHOOSERTYPE_RANDOMINTERNODE_STR "randominternode"
#define TARGETCHOOSERTYPE_RANDOMINTRANODE_STR "randomintranode"
Config::Config(int argc, char** argv):
AbstractConfig(argc, argv)
{
sysTargetAttachmentMap = NULL;
initConfig(argc, argv, true);
}
Config::~Config()
{
SAFE_DELETE(sysTargetAttachmentMap);
}
/**
* Sets the default values for each configurable in the configMap.
*
* @param addDashes currently unused
*/
void Config::loadDefaults(bool addDashes)
{
AbstractConfig::loadDefaults();
// re-definitions
configMapRedefine("cfgFile", "");
configMapRedefine("connMaxInternodeNum", "16");
// own definitions
configMapRedefine("connInterfacesFile", "");
configMapRedefine("connInterfacesList", "");
configMapRedefine("storeMetaDirectory", "");
configMapRedefine("storeFsUUID", "");
configMapRedefine("storeAllowFirstRunInit", "true");
configMapRedefine("storeUseExtendedAttribs", "true");
configMapRedefine("storeSelfHealEmptyFiles", "true");
configMapRedefine("storeClientXAttrs", "false");
configMapRedefine("storeClientACLs", "false");
configMapRedefine("sysTargetAttachmentFile", "");
configMapRedefine("tuneNumStreamListeners", "1");
configMapRedefine("tuneNumWorkers", "0");
configMapRedefine("tuneWorkerBufSize", "1m");
configMapRedefine("tuneNumCommSlaves", "0");
configMapRedefine("tuneCommSlaveBufSize", "1m");
configMapRedefine("tuneDefaultChunkSize", "512k");
configMapRedefine("tuneDefaultNumStripeTargets","4");
configMapRedefine("tuneProcessFDLimit", "50000");
configMapRedefine("tuneWorkerNumaAffinity", "false");
configMapRedefine("tuneListenerNumaAffinity", "false");
configMapRedefine("tuneBindToNumaZone", "");
configMapRedefine("tuneListenerPrioShift", "-1");
configMapRedefine("tuneDirMetadataCacheLimit", "1024");
configMapRedefine("tuneTargetChooser", TARGETCHOOSERTYPE_RANDOMIZED_STR);
configMapRedefine("tuneLockGrantWaitMS", "333");
configMapRedefine("tuneLockGrantNumRetries", "15");
configMapRedefine("tuneRotateMirrorTargets", "false");
configMapRedefine("tuneEarlyUnlinkResponse", "true");
configMapRedefine("tuneUsePerUserMsgQueues", "false");
configMapRedefine("tuneUseAggressiveStreamPoll","false");
configMapRedefine("tuneNumResyncSlaves", "12");
configMapRedefine("tuneMirrorTimestamps", "true");
configMapRedefine("tuneDisposalGCPeriod", "0");
configMapRedefine("quotaEarlyChownResponse", "true");
configMapRedefine("quotaEnableEnforcement", "false");
configMapRedefine("sysTargetOfflineTimeoutSecs","180");
configMapRedefine("sysAllowUserSetPattern", "false");
configMapRedefine("sysFileEventLogTarget", "");
configMapRedefine("sysFileEventPersistDirectory", "");
configMapRedefine("sysFileEventPersistSize", "0");
configMapRedefine("runDaemonized", "false");
configMapRedefine("pidFile", "");
}
/**
* @param addDashes currently usused
*/
void Config::applyConfigMap(bool enableException, bool addDashes)
{
AbstractConfig::applyConfigMap(false);
for (StringMapIter iter = configMap.begin(); iter != configMap.end();)
{
bool unknownElement = false;
if (iter->first == std::string("logType"))
{
if (iter->second == "syslog")
{
logType = LogType_SYSLOG;
}
else if (iter->second == "logfile")
{
logType = LogType_LOGFILE;
}
else
{
throw InvalidConfigException("The value of config argument logType is invalid.");
}
}
else if (iter->first == std::string("connInterfacesFile"))
connInterfacesFile = iter->second;
else if (iter->first == std::string("connInterfacesList"))
connInterfacesList = iter->second;
else if (iter->first == std::string("storeMetaDirectory"))
storeMetaDirectory = iter->second;
else if (iter->first == std::string("storeFsUUID"))
storeFsUUID = iter->second;
else if (iter->first == std::string("storeAllowFirstRunInit"))
storeAllowFirstRunInit = StringTk::strToBool(iter->second);
else if (iter->first == std::string("storeUseExtendedAttribs"))
storeUseExtendedAttribs = StringTk::strToBool(iter->second);
else if (iter->first == std::string("storeSelfHealEmptyFiles"))
storeSelfHealEmptyFiles = StringTk::strToBool(iter->second);
else if (iter->first == std::string("storeClientXAttrs"))
storeClientXAttrs = StringTk::strToBool(iter->second);
else if (iter->first == std::string("storeClientACLs"))
storeClientACLs = StringTk::strToBool(iter->second);
else if (iter->first == std::string("sysTargetAttachmentFile"))
sysTargetAttachmentFile = iter->second;
else if (iter->first == std::string("tuneNumStreamListeners"))
tuneNumStreamListeners = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneNumWorkers"))
tuneNumWorkers = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneWorkerBufSize"))
tuneWorkerBufSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneNumCommSlaves"))
tuneNumCommSlaves = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneCommSlaveBufSize"))
tuneCommSlaveBufSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneDefaultChunkSize"))
tuneDefaultChunkSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneDefaultNumStripeNodes")) // old "...Nodes" kept for compat
tuneDefaultNumStripeTargets = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneDefaultNumStripeTargets"))
tuneDefaultNumStripeTargets = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneProcessFDLimit"))
tuneProcessFDLimit = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneWorkerNumaAffinity"))
tuneWorkerNumaAffinity = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneListenerNumaAffinity"))
tuneListenerNumaAffinity = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneBindToNumaZone"))
{
if (iter->second.empty()) // not defined => disable
tuneBindToNumaZone = -1; // -1 means disable binding
else
tuneBindToNumaZone = StringTk::strToInt(iter->second);
}
else if (iter->first == std::string("tuneListenerPrioShift"))
tuneListenerPrioShift = StringTk::strToInt(iter->second);
else if (iter->first == std::string("tuneDirMetadataCacheLimit"))
tuneDirMetadataCacheLimit = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneTargetChooser"))
tuneTargetChooser = iter->second;
else if (iter->first == std::string("tuneLockGrantWaitMS"))
tuneLockGrantWaitMS = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneLockGrantNumRetries"))
tuneLockGrantNumRetries = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneRotateMirrorTargets"))
tuneRotateMirrorTargets = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneEarlyUnlinkResponse"))
tuneEarlyUnlinkResponse = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneUseAggressiveStreamPoll"))
tuneUseAggressiveStreamPoll = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneNumResyncSlaves"))
this->tuneNumResyncSlaves = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("quotaEarlyChownResponse"))
quotaEarlyChownResponse = StringTk::strToBool(iter->second);
else if (iter->first == std::string("quotaEnableEnforcement"))
quotaEnableEnforcement = StringTk::strToBool(iter->second);
else if (iter->first == std::string("sysTargetOfflineTimeoutSecs"))
{
sysTargetOfflineTimeoutSecs = StringTk::strToUInt(iter->second);
if (sysTargetOfflineTimeoutSecs < 30)
{
throw InvalidConfigException("Invalid sysTargetOfflineTimeoutSecs value "
+ iter->second + " (must be at least 30)");
}
}
else if (iter->first == std::string("sysAllowUserSetPattern"))
sysAllowUserSetPattern = StringTk::strToBool(iter->second.c_str());
else if (iter->first == std::string("tuneUsePerUserMsgQueues"))
tuneUsePerUserMsgQueues = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneMirrorTimestamps"))
tuneMirrorTimestamps = StringTk::strToBool(iter->second);
else if(iter->first == std::string("tuneDisposalGCPeriod"))
tuneDisposalGCPeriod = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("sysFileEventLogTarget"))
sysFileEventLogTarget = iter->second;
else if (iter->first == std::string("sysFileEventPersistDirectory"))
sysFileEventPersistDirectory = iter->second;
else if (iter->first == std::string("sysFileEventPersistSize"))
sysFileEventPersistSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("runDaemonized"))
runDaemonized = StringTk::strToBool(iter->second);
else if (iter->first == std::string("pidFile"))
pidFile = iter->second;
else
{
// unknown element occurred
unknownElement = true;
if (enableException)
{
throw InvalidConfigException("The config argument '" + iter->first + "' is invalid");
}
}
if (unknownElement)
{
// just skip the unknown element
iter++;
}
else
{
// remove this element from the map
iter = eraseFromConfigMap(iter);
}
}
}
void Config::initImplicitVals()
{
// tuneNumWorkers (note: twice the number of cpu cores is default, but at least 4)
if(!tuneNumWorkers)
tuneNumWorkers = BEEGFS_MAX(System::getNumOnlineCPUs()*2, 4);
// tuneNumCommSlaves
if(!tuneNumCommSlaves)
tuneNumCommSlaves = tuneNumWorkers * 2;
// tuneTargetChooserNum
initTuneTargetChooserNum();
// connInterfacesList(/File)
AbstractConfig::initInterfacesList(connInterfacesFile, connInterfacesList);
AbstractConfig::initSocketBufferSizes();
// connAuthHash
AbstractConfig::initConnAuthHash(connAuthFile, &connAuthHash);
// sysTargetAttachmentMap
initSysTargetAttachmentMap();
}
void Config::initSysTargetAttachmentMap()
{
if(sysTargetAttachmentFile.empty() )
return; // no file given => nothing to do here
// check if file exists
if(!StorageTk::pathExists(sysTargetAttachmentFile) )
throw InvalidConfigException("sysTargetAttachmentFile not found: " +
sysTargetAttachmentFile);
// load as string map
StringMap attachmentStrMap;
MapTk::loadStringMapFromFile(sysTargetAttachmentFile.c_str(), &attachmentStrMap);
// convert from string map to target map
sysTargetAttachmentMap = new TargetMap();
for(StringMapCIter iter = attachmentStrMap.begin(); iter != attachmentStrMap.end(); iter++)
{
(*sysTargetAttachmentMap)[StringTk::strToUInt(iter->first)] =
NumNodeID(StringTk::strToUInt(iter->second) );
}
}
void Config::initTuneTargetChooserNum()
{
if (this->tuneTargetChooser == TARGETCHOOSERTYPE_RANDOMIZED_STR)
this->tuneTargetChooserNum = TargetChooserType_RANDOMIZED;
else if (this->tuneTargetChooser == TARGETCHOOSERTYPE_ROUNDROBIN_STR)
this->tuneTargetChooserNum = TargetChooserType_ROUNDROBIN;
else if (this->tuneTargetChooser == TARGETCHOOSERTYPE_RANDOMROBIN_STR)
this->tuneTargetChooserNum = TargetChooserType_RANDOMROBIN;
else if (this->tuneTargetChooser == TARGETCHOOSERTYPE_RANDOMINTERNODE_STR)
this->tuneTargetChooserNum = TargetChooserType_RANDOMINTERNODE;
// Don't allow RANDOMINTRANODE Target Chooser
else
{
// invalid chooser specified
throw InvalidConfigException("Invalid storage target chooser specified: "
+ tuneTargetChooser);
}
}
std::string Config::createDefaultCfgFilename() const
{
struct stat statBuf;
const int statRes = stat(CONFIG_DEFAULT_CFGFILENAME, &statBuf);
if(!statRes && S_ISREG(statBuf.st_mode) )
return CONFIG_DEFAULT_CFGFILENAME; // there appears to be a config file
return ""; // no default file otherwise
}

View File

@@ -0,0 +1,294 @@
#pragma once
#include <common/app/config/AbstractConfig.h>
#include <common/nodes/TargetCapacityPools.h>
enum TargetChooserType
{
TargetChooserType_RANDOMIZED = 0,
TargetChooserType_ROUNDROBIN = 1, // round-robin in ID order
TargetChooserType_RANDOMROBIN = 2, // randomized round-robin (round-robin, but shuffle result)
TargetChooserType_RANDOMINTERNODE = 3, // select random targets from different nodes/domains
TargetChooserType_RANDOMINTRANODE = 4, // select random targets from the same node/domain
};
class Config : public AbstractConfig
{
public:
Config(int argc, char** argv);
virtual ~Config();
private:
// configurables
std::string connInterfacesFile; // implicitly generates connInterfacesList
std::string connInterfacesList; // comma-separated list
std::string storeMetaDirectory;
std::string storeFsUUID;
bool storeAllowFirstRunInit;
bool storeUseExtendedAttribs;
bool storeSelfHealEmptyFiles;
bool storeClientXAttrs;
bool storeClientACLs;
std::string sysTargetAttachmentFile; // used by randominternode target chooser
TargetMap* sysTargetAttachmentMap; /* implicitly by sysTargetAttachmentFile, NULL if
unset */
unsigned tuneNumStreamListeners;
unsigned tuneNumWorkers; // 0 means automatic
unsigned tuneWorkerBufSize;
unsigned tuneNumCommSlaves; // 0 means automatic
unsigned tuneCommSlaveBufSize;
unsigned tuneDefaultChunkSize;
unsigned tuneDefaultNumStripeTargets;
unsigned tuneProcessFDLimit; // 0 means "don't touch limit"
bool tuneWorkerNumaAffinity;
bool tuneListenerNumaAffinity;
int tuneBindToNumaZone; // bind all threads to this zone, -1 means no binding
int tuneListenerPrioShift; // inc/dec thread priority of listener components
unsigned tuneDirMetadataCacheLimit;
std::string tuneTargetChooser;
TargetChooserType tuneTargetChooserNum; // auto-generated based on tuneTargetChooser
unsigned tuneLockGrantWaitMS; // time to wait for an ack per retry
unsigned tuneLockGrantNumRetries; // number of lock grant send retries until ack recv
bool tuneRotateMirrorTargets; // true to use rotated targets list as mirrors
bool tuneEarlyUnlinkResponse; // true to send response before chunk files unlink
bool tuneUsePerUserMsgQueues; // true to use UserWorkContainer for MultiWorkQueue
bool tuneUseAggressiveStreamPoll; // true to not sleep on epoll in streamlisv2
unsigned tuneNumResyncSlaves;
bool tuneMirrorTimestamps;
unsigned tuneDisposalGCPeriod; // sleep between disposal garbage collector runs [seconds], 0 = disabled
bool quotaEarlyChownResponse; // true to send response before chunk files chown
bool quotaEnableEnforcement;
unsigned sysTargetOfflineTimeoutSecs;
bool sysAllowUserSetPattern;
bool runDaemonized;
std::string pidFile;
bool limitXAttrListLength;
std::string sysFileEventLogTarget;
std::string sysFileEventPersistDirectory;
int64_t sysFileEventPersistSize;
// internals
virtual void loadDefaults(bool addDashes) override;
virtual void applyConfigMap(bool enableException, bool addDashes) override;
virtual void initImplicitVals() override;
void initSysTargetAttachmentMap();
void initTuneTargetChooserNum();
std::string createDefaultCfgFilename() const;
public:
// getters & setters
const std::string& getConnInterfacesList() const
{
return connInterfacesList;
}
const std::string& getStoreMetaDirectory() const
{
return storeMetaDirectory;
}
const std::string& getStoreFsUUID() const
{
return storeFsUUID;
}
bool getStoreAllowFirstRunInit() const
{
return storeAllowFirstRunInit;
}
bool getStoreUseExtendedAttribs() const
{
return storeUseExtendedAttribs;
}
bool getStoreSelfHealEmptyFiles() const
{
return storeSelfHealEmptyFiles;
}
bool getStoreClientXAttrs() const
{
return storeClientXAttrs;
}
bool getStoreClientACLs() const
{
return storeClientACLs;
}
const std::string& getSysTargetAttachmentFile() const
{
return sysTargetAttachmentFile;
}
const TargetMap* getSysTargetAttachmentMap() const
{
return sysTargetAttachmentMap;
}
unsigned getTuneNumStreamListeners() const
{
return tuneNumStreamListeners;
}
unsigned getTuneNumWorkers() const
{
return tuneNumWorkers;
}
unsigned getTuneWorkerBufSize() const
{
return tuneWorkerBufSize;
}
unsigned getTuneNumCommSlaves() const
{
return tuneNumCommSlaves;
}
unsigned getTuneCommSlaveBufSize() const
{
return tuneCommSlaveBufSize;
}
unsigned getTuneDefaultChunkSize() const
{
return tuneDefaultChunkSize;
}
unsigned getTuneDefaultNumStripeTargets() const
{
return tuneDefaultNumStripeTargets;
}
unsigned getTuneProcessFDLimit() const
{
return tuneProcessFDLimit;
}
bool getTuneWorkerNumaAffinity() const
{
return tuneWorkerNumaAffinity;
}
bool getTuneListenerNumaAffinity() const
{
return tuneListenerNumaAffinity;
}
int getTuneBindToNumaZone() const
{
return tuneBindToNumaZone;
}
int getTuneListenerPrioShift() const
{
return tuneListenerPrioShift;
}
unsigned getTuneDirMetadataCacheLimit() const
{
return tuneDirMetadataCacheLimit;
}
TargetChooserType getTuneTargetChooserNum() const
{
return tuneTargetChooserNum;
}
unsigned getTuneLockGrantWaitMS() const
{
return tuneLockGrantWaitMS;
}
unsigned getTuneLockGrantNumRetries() const
{
return tuneLockGrantNumRetries;
}
bool getTuneRotateMirrorTargets() const
{
return tuneRotateMirrorTargets;
}
bool getTuneEarlyUnlinkResponse() const
{
return tuneEarlyUnlinkResponse;
}
bool getTuneUsePerUserMsgQueues() const
{
return tuneUsePerUserMsgQueues;
}
bool getTuneUseAggressiveStreamPoll() const
{
return tuneUseAggressiveStreamPoll;
}
unsigned getTuneNumResyncSlaves() const
{
return tuneNumResyncSlaves;
}
bool getQuotaEarlyChownResponse() const
{
return quotaEarlyChownResponse;
}
bool getQuotaEnableEnforcement() const
{
return quotaEnableEnforcement;
}
void setQuotaEnableEnforcement(bool doQuotaEnforcement)
{
quotaEnableEnforcement = doQuotaEnforcement;
}
unsigned getSysTargetOfflineTimeoutSecs() const
{
return sysTargetOfflineTimeoutSecs;
}
bool getRunDaemonized() const
{
return runDaemonized;
}
const std::string& getPIDFile() const
{
return pidFile;
}
bool getTuneMirrorTimestamps() const { return tuneMirrorTimestamps; }
unsigned getTuneDisposalGCPeriod() const { return tuneDisposalGCPeriod; }
bool getSysAllowUserSetPattern() const { return sysAllowUserSetPattern; }
bool getLimitXAttrListLength() const { return limitXAttrListLength; }
void setLimitXAttrListLength(bool value) { limitXAttrListLength = value; }
const std::string& getFileEventLogTarget() const { return sysFileEventLogTarget; }
const std::string& getFileEventPersistDirectory() const { return sysFileEventPersistDirectory; }
uint64_t getFileEventPersistSize() const { return sysFileEventPersistSize; }
};

View File

@@ -0,0 +1,57 @@
#include "DatagramListener.h"
#include <common/net/message/NetMessageTypes.h>
DatagramListener::DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
AcknowledgmentStore* ackStore, unsigned short udpPort, bool restrictOutboundInterfaces):
AbstractDatagramListener("DGramLis", netFilter, localNicList, ackStore, udpPort,
restrictOutboundInterfaces)
{
}
void DatagramListener::handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg)
{
HighResolutionStats stats; // currently ignored
std::shared_ptr<StandardSocket> sock = findSenderSock(fromAddr->sin_addr);
if (sock == nullptr)
{
log.log(Log_WARNING, "Could not handle incoming message: no socket");
return;
}
NetMessage::ResponseContext rctx(fromAddr, sock.get(), sendBuf, DGRAMMGR_SENDBUF_SIZE, &stats);
const auto messageType = netMessageTypeToStr(msg->getMsgType());
switch(msg->getMsgType() )
{
// valid messages within this context
case NETMSGTYPE_Ack:
case NETMSGTYPE_Dummy:
case NETMSGTYPE_HeartbeatRequest:
case NETMSGTYPE_Heartbeat:
case NETMSGTYPE_MapTargets:
case NETMSGTYPE_PublishCapacities:
case NETMSGTYPE_RemoveNode:
case NETMSGTYPE_RefreshCapacityPools:
case NETMSGTYPE_RefreshStoragePools:
case NETMSGTYPE_RefreshTargetStates:
case NETMSGTYPE_SetMirrorBuddyGroup:
{
if(!msg->processIncoming(rctx) )
{
LOG(GENERAL, WARNING,
"Problem encountered during handling of incoming message.", messageType);
}
} break;
default:
{ // valid, but not within this context
log.logErr(
"Received a message that is invalid within the current context "
"from: " + Socket::ipaddrToStr(fromAddr->sin_addr) + "; "
"type: " + messageType );
} break;
};
}

View File

@@ -0,0 +1,18 @@
#pragma once
#include <common/components/AbstractDatagramListener.h>
class DatagramListener : public AbstractDatagramListener
{
public:
DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
AcknowledgmentStore* ackStore, unsigned short udpPort,
bool restrictOutboundInterfaces);
protected:
virtual void handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg);
private:
};

View File

@@ -0,0 +1,51 @@
#include "DisposalGarbageCollector.h"
#include "app/App.h"
#include "program/Program.h"
#include <common/toolkit/DisposalCleaner.h>
FhgfsOpsErr deleteFile(unsigned& unlinked, Node& owner, const std::string& entryID, const bool isMirrored) {
const auto err = DisposalCleaner::unlinkFile(owner, entryID, isMirrored);
if (err == FhgfsOpsErr_COMMUNICATION)
LOG(GENERAL, ERR, "Communication error", entryID, isMirrored);
else if (err == FhgfsOpsErr_INUSE)
LOG(GENERAL, ERR, "File in use", entryID, isMirrored);
else if (err != FhgfsOpsErr_SUCCESS)
LOG(GENERAL, ERR, "Error", entryID, isMirrored, err);
else
(unlinked)++;
return FhgfsOpsErr_SUCCESS;
}
void handleError(Node&, FhgfsOpsErr err) {
LOG(GENERAL, ERR, "Disposal garbage collection run failed", err);
}
void disposalGarbageCollector() {
LOG(GENERAL, NOTICE, "Disposal garbage collection started");
auto app = Program::getApp();
unsigned unlinked = 0;
const std::vector<NodeHandle> nodes = {app->getMetaNodes()->referenceNode(app->getLocalNode().getNumID())};
DisposalCleaner dc(*app->getMetaBuddyGroupMapper(), true);
dc.run(nodes,
[&unlinked] (auto&& owner, auto&& entryID, auto&& isMirrored) {
return deleteFile(unlinked, owner, entryID, isMirrored);
},
handleError,
[&app] () { return app->getGcQueue()->getSelfTerminate(); }
);
LOG(GENERAL, NOTICE, "Disposal garbage collection finished", unlinked);
if(const auto wait = app->getConfig()->getTuneDisposalGCPeriod()) {
if(auto* queue = app->getGcQueue()) {
queue->enqueue(std::chrono::seconds(wait), disposalGarbageCollector);
}
}
}

View File

@@ -0,0 +1,4 @@
#pragma once
void disposalGarbageCollector();

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
#pragma once
#include <common/storage/FileEvent.h>
#include <common/storage/EntryInfo.h>
#include <string>
struct EventContext
{
static constexpr uint32_t EVENTFLAG_NONE = 0;
static constexpr uint32_t EVENTFLAG_MIRRORED = (1 << 0); // Event is for a mirrored entry
static constexpr uint32_t EVENTFLAG_SECONDARY = (1 << 1); // Event generated by secondary node
std::string entryId;
std::string parentId;
unsigned msgUserId;
std::string targetParentId;
unsigned linkCount;
int64_t timestamp;
uint32_t eventFlags; // bitwise OR of EVENTFLAG_ values above.
};
EventContext makeEventContext(EntryInfo* entryInfo, std::string parentId, unsigned msgUserId,
std::string targetParentId, unsigned linkCount, bool isSecondary);
struct FileEventLoggerIds
{
uint32_t nodeId;
uint16_t buddyGroupId;
};
struct FileEventLoggerParams
{
std::string address;
FileEventLoggerIds ids;
};
struct FileEventLogger;
FileEventLogger *createFileEventLogger(FileEventLoggerParams const& params);
void destroyFileEventLogger(FileEventLogger *logger);
void logEvent(FileEventLogger *logger, FileEvent const& event, EventContext const& eventCtx);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,140 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/components/ComponentInitException.h>
#include <common/net/message/nodes/GetNodeCapacityPoolsMsg.h>
#include <common/net/message/nodes/GetNodeCapacityPoolsRespMsg.h>
#include <common/nodes/NodeStoreServers.h>
#include <common/storage/quota/QuotaData.h>
#include <common/threading/PThread.h>
#include <common/Common.h>
#include <storage/NodeOfflineWait.h>
#include <atomic>
#include <mutex>
class AbstractDatagramListener;
class InternodeSyncer : public PThread
{
public:
InternodeSyncer(TargetConsistencyState initialConsistencyState);
virtual ~InternodeSyncer() { }
static bool registerNode(AbstractDatagramListener* dgramLis);
static bool updateMetaStatesAndBuddyGroups(TargetConsistencyState& outConsistencyState,
bool publish);
static void syncClients(const std::vector<NodeHandle>& clientsList, bool allowRemoteComm);
static bool downloadAndSyncNodes();
static bool downloadAndSyncTargetMappings();
static bool downloadAndSyncStoragePools();
static bool downloadAndSyncTargetStatesAndBuddyGroups();
static void downloadAndSyncClients(bool requeue);
static bool updateMetaCapacityPools();
static bool updateMetaBuddyCapacityPools();
static bool downloadAllExceededQuotaLists(const StoragePoolPtrVec& storagePools);
static bool downloadExceededQuotaList(StoragePoolId storagePoolId, QuotaDataType idType,
QuotaLimitType exType, UIntList* outIDList, FhgfsOpsErr& error);
static void printSyncResults(NodeType nodeType, NumNodeIDList* addedNodes,
NumNodeIDList* removedNodes);
private:
LogContext log;
#if ATOMIC_BOOL_LOCK_FREE != 2
# warn atomic<bool> is not always lock-free
#endif
std::atomic<bool> forcePoolsUpdate; // true to force update of capacity pools
std::atomic<bool> forceTargetStatesUpdate; // true to force update of node state
std::atomic<bool> forcePublishCapacities; // true to force publishing free capacity
std::atomic<bool> forceStoragePoolsUpdate; // true to force update of storage pools
std::atomic<bool> forceCheckNetwork; // true to force checking of network changes
// Keeps track of the timeout during which the node may not send state reports because it is
// waiting to be offlined by the mgmtd.
NodeOfflineWait offlineWait;
Mutex nodeConsistencyStateMutex;
TargetConsistencyState nodeConsistencyState; // Node's own consistency state.
// Note: This is initialized when updateMetaStates... is called from App::downloadMgmtInfo.
AtomicUInt32 buddyResyncInProgress;
virtual void run();
void syncLoop();
static bool updateStorageCapacityPools();
static bool updateTargetBuddyCapacityPools();
static std::pair<bool, GetNodeCapacityPoolsRespMsg::PoolsMap> downloadCapacityPools(
CapacityPoolQueryType poolType);
void publishNodeCapacity();
void forceMgmtdPoolsRefresh();
// returns true if the local interfaces have changed
bool checkNetwork();
void dropIdleConns();
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
void getStatInfo(int64_t* outSizeTotal, int64_t* outSizeFree, int64_t* outInodesTotal,
int64_t* outInodesFree);
static TargetConsistencyState decideResync(const CombinedTargetState newState);
static bool publishNodeStateChange(const TargetConsistencyState oldState,
const TargetConsistencyState newState);
static bool downloadAllExceededQuotaLists(const StoragePoolPtr storagePool);
public:
// inliners
void setForcePoolsUpdate()
{
forcePoolsUpdate = true;
}
void setForceTargetStatesUpdate()
{
forceTargetStatesUpdate = true;
}
void setForcePublishCapacities()
{
forcePublishCapacities = true;
}
void setForceStoragePoolsUpdate()
{
forceStoragePoolsUpdate = true;
}
void setForceCheckNetwork()
{
forceCheckNetwork = true;
}
TargetConsistencyState getNodeConsistencyState()
{
std::lock_guard<Mutex> lock(nodeConsistencyStateMutex);
return nodeConsistencyState;
}
void setNodeConsistencyState(TargetConsistencyState newState)
{
std::lock_guard<Mutex> lock(nodeConsistencyStateMutex);
nodeConsistencyState = newState;
}
void setResyncInProgress(bool resyncInProgress)
{
this->buddyResyncInProgress.set(resyncInProgress);
}
bool getResyncInProgress()
{
return this->buddyResyncInProgress.read();
}
};

View File

@@ -0,0 +1,145 @@
#include "ModificationEventFlusher.h"
#include <common/toolkit/SynchronizedCounter.h>
#include <common/toolkit/ListTk.h>
#include <common/net/message/fsck/FsckModificationEventMsg.h>
#include <program/Program.h>
#include <mutex>
ModificationEventFlusher::ModificationEventFlusher()
: PThread("ModificationEventFlusher"),
log("ModificationEventFlusher"),
dGramLis(Program::getApp()->getDatagramListener() ),
workerList(Program::getApp()->getWorkers() ),
fsckMissedEvent(false)
{
NicAddressList nicList;
this->fsckNode = std::make_shared<Node>(NODETYPE_Invalid, "fsck", NumNodeID(), 0, 0, nicList);
NicListCapabilities localNicCaps;
NicAddressList localNicList = Program::getApp()->getLocalNicList();
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
this->fsckNode->getConnPool()->setLocalNicList(localNicList, localNicCaps);
}
void ModificationEventFlusher::run()
{
try
{
registerSignalHandler();
while ( !this->getSelfTerminate() )
{
while ( this->eventTypeBufferList.empty() )
{
{
const std::lock_guard<Mutex> lock(eventsAddedMutex);
this->eventsAddedCond.timedwait(&eventsAddedMutex, 2000);
}
if ( this->getSelfTerminate() )
goto stop_component;
}
// buffer list not empty... go ahead and send it
this->sendToFsck();
}
stop_component:
log.log(Log_DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
}
bool ModificationEventFlusher::add(ModificationEventType eventType, const std::string& entryID)
{
while (true)
{
{
const std::lock_guard<Mutex> lock(mutex);
if (this->eventTypeBufferList.size() < MODFLUSHER_MAXSIZE_EVENTLIST)
break;
}
// queue too long
// wait if something is flushed
{
const std::lock_guard<Mutex> lock(eventsFlushedMutex);
this->eventsFlushedCond.timedwait(&eventsFlushedMutex, 5000);
}
}
{
const std::lock_guard<Mutex> lock(mutex);
this->eventTypeBufferList.push_back((uint8_t)eventType);
this->entryIDBufferList.push_back(entryID);
}
{
const std::lock_guard<Mutex> lock(eventsAddedMutex);
this->eventsAddedCond.broadcast();
}
return true;
}
void ModificationEventFlusher::sendToFsck()
{
if (!fsckNode)
{
log.logErr("Fsck modification events are present, but fsck node is not set.");
this->fsckMissedEvent = true;
// stop logging
this->disableLoggingLocally(false);
return;
}
// get the first MODFLUSHER_SEND_AT_ONCE entries from each list and send them to fsck
// only have the mutex on the lists as long as we really need it
UInt8List eventTypeListCopy;
StringList entryIDListCopy;
{
const std::lock_guard<Mutex> lock(mutex);
UInt8ListIter eventTypeStart = this->eventTypeBufferList.begin();
UInt8ListIter eventTypeEnd = this->eventTypeBufferList.begin();
ListTk::advance(eventTypeBufferList, eventTypeEnd, MODFLUSHER_SEND_AT_ONCE);
StringListIter entryIDStart = this->entryIDBufferList.begin();
StringListIter entryIDEnd = this->entryIDBufferList.begin();
ListTk::advance(entryIDBufferList, entryIDEnd, MODFLUSHER_SEND_AT_ONCE);
eventTypeListCopy.splice(eventTypeListCopy.begin(), this->eventTypeBufferList, eventTypeStart,
eventTypeEnd);
entryIDListCopy.splice(entryIDListCopy.begin(), this->entryIDBufferList, entryIDStart,
entryIDEnd);
}
FsckModificationEventMsg fsckModificationEventMsg(&eventTypeListCopy, &entryIDListCopy,
this->fsckMissedEvent);
bool ackReceived = this->dGramLis->sendToNodeUDPwithAck(fsckNode, &fsckModificationEventMsg,
MODFLUSHER_WAIT_FOR_ACK_MS, MODFLUSHER_WAIT_FOR_ACK_RETRIES);
if (!ackReceived)
{
log.log(Log_CRITICAL,
"Did not receive an ack from fsck for a FsckModificationEventMsg");
this->fsckMissedEvent = true;
// stop logging
this->disableLoggingLocally(false);
}
const std::lock_guard<Mutex> lock(eventsFlushedMutex);
eventsFlushedCond.broadcast();
}

View File

@@ -0,0 +1,184 @@
#pragma once
#include <app/App.h>
#include <common/components/worker/Worker.h>
#include <common/components/worker/DecAtomicWork.h>
#include <common/components/worker/IncAtomicWork.h>
#include <common/components/worker/IncSyncedCounterWork.h>
#include <common/threading/Condition.h>
#include <common/threading/Barrier.h>
#include <common/toolkit/MetadataTk.h>
#include <components/worker/BarrierWork.h>
#include <components/DatagramListener.h>
#include <program/Program.h>
#include <mutex>
#define MODFLUSHER_MAXSIZE_EVENTLIST 10000
#define MODFLUSHER_SEND_AT_ONCE 10 // only very few events, because msg is UDP
#define MODFLUSHER_FLUSH_MAX_INTERVAL_MS 5000
#define MODFLUSHER_WAIT_FOR_ACK_MS 1000
#define MODFLUSHER_WAIT_FOR_ACK_RETRIES 100
/*
* Note: this class is only used by fsck at the moment; therefore it is designed for fsck
*/
class ModificationEventFlusher: public PThread
{
public:
ModificationEventFlusher();
virtual void run();
bool add(ModificationEventType eventType, const std::string& entryID);
private:
LogContext log;
DatagramListener* dGramLis;
std::list<Worker*>* workerList;
UInt8List eventTypeBufferList;
StringList entryIDBufferList;
// general mutex used to lock the buffer and the notification enabling and disabling
Mutex mutex;
Mutex eventsFlushedMutex;
Condition eventsFlushedCond;
Mutex eventsAddedMutex;
Condition eventsAddedCond;
AtomicSizeT loggingEnabled; // 1 if enabled
Mutex fsckMutex;
NodeHandle fsckNode;
bool fsckMissedEvent;
void sendToFsck();
public:
// inliners
/**
* @returns true if logging was enabled, false if it was alredy running
*/
bool enableLogging(unsigned fsckPortUDP, NicAddressList& fsckNicList, bool forceRestart)
{
std::unique_lock<Mutex> lock(mutex);
if (!forceRestart && loggingEnabled.read() > 0)
return false;
eventTypeBufferList.clear();
entryIDBufferList.clear();
this->loggingEnabled.set(1);
// set fsckParameters
setFsckParametersUnlocked(fsckPortUDP, fsckNicList);
lock.unlock();
// make sure all workers have noticed the changed loggingEnabled flag
stallAllWorkers(true, false);
return true;
}
bool disableLogging()
{
return disableLoggingLocally(true);
}
bool isLoggingEnabled()
{
return (this->loggingEnabled.read() != 0);
}
bool getFsckMissedEvent()
{
const std::lock_guard<Mutex> lock(fsckMutex);
return this->fsckMissedEvent;
}
private:
/*
* Note: if logging is already disabled, this function basically does nothing, but returns
* if the buffer is empty or not
* @param fromWorker set to true if this is called from a worker thread. Otherwise, the worker
* calling this will deadlock
* @return true if buffer is empty, false otherwise
*/
bool disableLoggingLocally(bool fromWorker)
{
loggingEnabled.setZero();
stallAllWorkers(fromWorker, true);
std::lock_guard<Mutex> lock(mutex);
// make sure list is empty and no worker is logging anymore
return this->eventTypeBufferList.empty();
}
void setFsckParametersUnlocked(unsigned portUDP, NicAddressList& nicList)
{
this->fsckMissedEvent = false;
this->fsckNode->updateInterfaces(portUDP, 0, nicList);
}
/**
* @param fromWorker This is called from a worker thread. In that case, this function blocks
* only until n-1 workers have reached the counter work item - because one
* of the workers is already blocked inside this function.
* @param flush Flush the modification event queue. Do this when stopping the modification
* event logger, because otherwise, workers might lock up trying to enqueue items
* which will never be sent to the Fsck.
*/
void stallAllWorkers(bool fromWorker, bool flush)
{
App* app = Program::getApp();
MultiWorkQueue* workQueue = app->getWorkQueue();
pthread_t threadID = PThread::getCurrentThreadID();
SynchronizedCounter notified;
for (auto workerIt = workerList->begin(); workerIt != workerList->end(); ++workerIt)
{
// don't enqueue it in the worker that processes this message (this would deadlock)
if (!PThread::threadIDEquals((*workerIt)->getID(), threadID) || !fromWorker)
{
PersonalWorkQueue* personalQ = (*workerIt)->getPersonalWorkQueue();
workQueue->addPersonalWork(new IncSyncedCounterWork(&notified), personalQ);
}
}
while (true)
{
const bool done = notified.timedWaitForCount(workerList->size() - (fromWorker ? 1 : 0),
1000);
if (done)
{
break;
}
else if (flush)
{
{
const std::lock_guard<Mutex> lock(mutex);
this->eventTypeBufferList.clear();
this->entryIDBufferList.clear();
}
{
const std::lock_guard<Mutex> lock(eventsFlushedMutex);
eventsFlushedCond.broadcast();
}
}
}
}
};

View File

@@ -0,0 +1,530 @@
#include <program/Program.h>
#include <common/components/worker/IncSyncedCounterWork.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <common/threading/Barrier.h>
#include <common/toolkit/DebugVariable.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <app/App.h>
#include <components/buddyresyncer/BuddyResyncerBulkSyncSlave.h>
#include <components/buddyresyncer/BuddyResyncerModSyncSlave.h>
#include <components/worker/BarrierWork.h>
#include <toolkit/BuddyCommTk.h>
#include "BuddyResyncJob.h"
BuddyResyncJob::BuddyResyncJob() :
PThread("BuddyResyncJob"),
state(BuddyResyncJobState_NOTSTARTED),
startTime(0), endTime(0),
gatherSlave(boost::make_unique<BuddyResyncerGatherSlave>(&syncCandidates))
{
App* app = Program::getApp();
Config* cfg = app->getConfig();
buddyNodeID =
NumNodeID(app->getMetaBuddyGroupMapper()->getBuddyTargetID(app->getLocalNodeNumID().val()));
const unsigned numSyncSlaves = std::max<unsigned>(cfg->getTuneNumResyncSlaves(), 1);
for (size_t i = 0; i < numSyncSlaves; i++)
bulkSyncSlaves.emplace_back(
boost::make_unique<BuddyResyncerBulkSyncSlave>(*this, &syncCandidates, i, buddyNodeID));
sessionStoreResyncer = boost::make_unique<SessionStoreResyncer>(buddyNodeID);
modSyncSlave = boost::make_unique<BuddyResyncerModSyncSlave>(*this, &syncCandidates, 1, buddyNodeID);
}
BuddyResyncJob::~BuddyResyncJob() = default;
void BuddyResyncJob::run()
{
const char* logContext = "Run resync job";
InternodeSyncer* internodeSyncer = Program::getApp()->getInternodeSyncer();
App* app = Program::getApp();
WorkerList* workers = app->getWorkers();
NodeStore* metaNodes = app->getMetaNodes();
const std::string metaPath = app->getMetaPath();
const std::string metaBuddyMirPath = app->getMetaPath() + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
Barrier workerBarrier(workers->size() + 1);
bool workersStopped = false;
startTime = time(NULL);
syncCandidates.clear();
auto buddyNode = metaNodes->referenceNode(buddyNodeID);
if (!buddyNode)
{
LOG(MIRRORING, ERR, "Unable to resolve buddy node. Resync will not start.");
setState(BuddyResyncJobState_FAILURE);
goto cleanup;
}
DEBUG_ENV_VAR(unsigned, DIE_AT_RESYNC_N, 0, "BEEGFS_RESYNC_DIE_AT_N");
if (DIE_AT_RESYNC_N) {
static unsigned resyncs = 0;
// for #479: terminating a server at this point caused the workers to terminate before the
// resyncer had communicated with them, causing a deadlock on shutdown
if (++resyncs == DIE_AT_RESYNC_N) {
::kill(0, SIGTERM);
sleep(4);
}
}
stopAllWorkersOn(workerBarrier);
{
// Notify buddy that resync started and wait for confirmation
StorageResyncStartedMsg msg(buddyNodeID.val());
const auto respMsg = MessagingTk::requestResponse(*buddyNode, msg,
NETMSGTYPE_StorageResyncStartedResp);
if (!respMsg)
{
LogContext(logContext).logErr("Unable to notify buddy about resync attempt. "
"Resync will not start.");
setState(BuddyResyncJobState_FAILURE);
workerBarrier.wait();
goto cleanup;
}
// resync could have been aborted before we got here. if so, exit as soon as possible without
// setting the resync job state to something else.
{
std::unique_lock<Mutex> lock(stateMutex);
if (state == BuddyResyncJobState_INTERRUPTED)
{
lock.unlock();
workerBarrier.wait();
goto cleanup;
}
state = BuddyResyncJobState_RUNNING;
}
internodeSyncer->setResyncInProgress(true);
const bool startGatherSlaveRes = startGatherSlaves();
if (!startGatherSlaveRes)
{
setState(BuddyResyncJobState_FAILURE);
workerBarrier.wait();
goto cleanup;
}
const bool startResyncSlaveRes = startSyncSlaves();
if (!startResyncSlaveRes)
{
setState(BuddyResyncJobState_FAILURE);
workerBarrier.wait();
goto cleanup;
}
}
workerBarrier.wait();
LOG_DEBUG(__func__, Log_DEBUG, "Going to join gather slaves.");
joinGatherSlaves();
LOG_DEBUG(__func__, Log_DEBUG, "Joined gather slaves.");
LOG_DEBUG(__func__, Log_DEBUG, "Going to join sync slaves.");
// gather slaves have finished. Tell sync slaves to stop when work packages are empty and wait.
for (auto it = bulkSyncSlaves.begin(); it != bulkSyncSlaves.end(); ++it)
{
(*it)->setOnlyTerminateIfIdle(true);
(*it)->selfTerminate();
}
for (auto it = bulkSyncSlaves.begin(); it != bulkSyncSlaves.end(); ++it)
(*it)->join();
// here we can be in one of two situations:
// 1. bulk resync has succeeded. we then totally stop the workers: the session store must be in
// a quiescent state for resync, so for simplicitly, we suspend all client operations here.
// we do not want to do this any earlier than this point, because bulk syncers may take a
// very long time to complete.
// 2. bulk resync has failed. in this case, the bulk syncers have aborted the currently running
// job, and the mod syncer is either dead or in the process of dying. here we MUST NOT stop
// the workers, because they are very likely blocked on the mod sync queue already and will
// not unblock before the queue is cleared.
if (getState() == BuddyResyncJobState_RUNNING)
{
stopAllWorkersOn(workerBarrier);
workersStopped = true;
}
modSyncSlave->setOnlyTerminateIfIdle(true);
modSyncSlave->selfTerminate();
modSyncSlave->join();
// gatherers are done and the workers have been stopped, we can safely resync the session now.
LOG_DEBUG(__func__, Log_DEBUG, "Joined sync slaves.");
// Perform session store resync
// the job may have been aborted or terminated by errors. in this case, do not resync the session
// store. end the sync as quickly as possible.
if (getState() == BuddyResyncJobState_RUNNING)
sessionStoreResyncer->doSync();
// session store is now synced, and future actions can be forwarded safely. we do not restart
// the workers here because the resync may still enter FAILED state, and we don't want to forward
// to the secondary in this case.
cleanup:
bool syncErrors = false;
{
std::lock_guard<Mutex> lock(gatherSlave->stateMutex);
while (gatherSlave->isRunning)
gatherSlave->isRunningChangeCond.wait(&gatherSlave->stateMutex);
syncErrors |= gatherSlave->getStats().errors != 0;
}
for (auto it = bulkSyncSlaves.begin(); it != bulkSyncSlaves.end(); ++it)
{
BuddyResyncerBulkSyncSlave* slave = it->get();
std::lock_guard<Mutex> lock(slave->stateMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&slave->stateMutex);
syncErrors |= slave->getStats().dirErrors != 0;
syncErrors |= slave->getStats().fileErrors != 0;
}
syncErrors |= sessionStoreResyncer->getStats().errors;
{
while (modSyncSlave->isRunning)
modSyncSlave->isRunningChangeCond.wait(&modSyncSlave->stateMutex);
syncErrors |= modSyncSlave->getStats().errors != 0;
}
if (getState() == BuddyResyncJobState_RUNNING || getState() == BuddyResyncJobState_INTERRUPTED)
{
if (syncErrors)
setState(BuddyResyncJobState_ERRORS);
else if (getState() == BuddyResyncJobState_RUNNING)
setState(BuddyResyncJobState_SUCCESS);
// delete timestamp override file if it exists.
BuddyCommTk::setBuddyNeedsResync(metaPath, false);
const TargetConsistencyState buddyState = newBuddyState();
informBuddy(buddyState);
informMgmtd(buddyState);
const bool interrupted = getState() != BuddyResyncJobState_SUCCESS;
LOG(MIRRORING, WARNING, "Resync finished.", interrupted, syncErrors);
}
internodeSyncer->setResyncInProgress(false);
endTime = time(NULL);
// restart all the worker threads
if (workersStopped)
workerBarrier.wait();
// if the resync was aborted, the mod sync queue may still contain items. additionally, workers
// may be waiting for a changeset slot, or they may have started executing after the resync was
// aborted by the sync slaves, but before the resync was officially set to "not running".
// we cannot set the resync to "not running" in abort() because we have no upper bound for the
// number of worker threads. even if we did set the resync to "not running" in abort() and
// cleared the sync queues at the same time, there may still be an arbitrary number of threads
// waiting for a changeset slot.
// instead, we have to wait for each thread to "see" that the resync is over, and periodically
// clear the sync queue to unblock those workers that are still waiting for slots.
if (syncErrors)
{
SynchronizedCounter counter;
for (auto it = workers->begin(); it != workers->end(); ++it)
{
auto& worker = **it;
worker.getWorkQueue()->addPersonalWork(
new IncSyncedCounterWork(&counter),
worker.getPersonalWorkQueue());
}
while (!counter.timedWaitForCount(workers->size(), 100))
{
while (!syncCandidates.isFilesEmpty())
{
MetaSyncCandidateFile candidate;
syncCandidates.fetch(candidate, this);
candidate.signal();
}
}
}
}
void BuddyResyncJob::stopAllWorkersOn(Barrier& barrier)
{
WorkerList* workers = Program::getApp()->getWorkers();
for (WorkerListIter workerIt = workers->begin(); workerIt != workers->end(); ++workerIt)
{
Worker* worker = *workerIt;
PersonalWorkQueue* personalQ = worker->getPersonalWorkQueue();
MultiWorkQueue* workQueue = worker->getWorkQueue();
workQueue->addPersonalWork(new BarrierWork(&barrier), personalQ);
}
barrier.wait(); // Wait until all workers are blocked.
}
void BuddyResyncJob::abort(bool wait_for_completion)
{
setState(BuddyResyncJobState_INTERRUPTED);
gatherSlave->selfTerminate();
// set onlyTerminateIfIdle on the slaves to false - they will be stopped by the main loop then.
for (auto it = bulkSyncSlaves.begin(); it != bulkSyncSlaves.end(); ++it)
{
BuddyResyncerBulkSyncSlave* slave = it->get();
slave->setOnlyTerminateIfIdle(false);
}
modSyncSlave->selfTerminate();
int retry = 600;
/* Wait till all on-going thread events are fetched or max 30mins.
* (fetch waits for 3secs if there are no files to be fetched)
*/
if (wait_for_completion)
{
modSyncSlave->join();
while (threadCount > 0 && retry)
{
LOG(MIRRORING, WARNING, "Wait for pending worker threads to finish");
if (!syncCandidates.isFilesEmpty())
{
MetaSyncCandidateFile candidate;
syncCandidates.fetch(candidate, this);
candidate.signal();
}
retry--;
}
if (threadCount)
LOG(MIRRORING, ERR, "Cleanup of aborted resync failed: I/O worker threads"
" did not finish properly: ",
("threadCount", threadCount.load()));
}
}
bool BuddyResyncJob::startGatherSlaves()
{
try
{
gatherSlave->resetSelfTerminate();
gatherSlave->start();
gatherSlave->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
return false;
}
return true;
}
bool BuddyResyncJob::startSyncSlaves()
{
App* app = Program::getApp();
const NumNodeID localNodeID = app->getLocalNodeNumID();
const NumNodeID buddyNodeID(
app->getMetaBuddyGroupMapper()->getBuddyTargetID(localNodeID.val(), NULL) );
for (size_t i = 0; i < bulkSyncSlaves.size(); i++)
{
try
{
bulkSyncSlaves[i]->resetSelfTerminate();
bulkSyncSlaves[i]->start();
bulkSyncSlaves[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what() );
for (size_t j = 0; j < i; j++)
bulkSyncSlaves[j]->selfTerminate();
return false;
}
}
try
{
modSyncSlave->resetSelfTerminate();
modSyncSlave->start();
modSyncSlave->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what() );
for (size_t j = 0; j < bulkSyncSlaves.size(); j++)
bulkSyncSlaves[j]->selfTerminate();
return false;
}
return true;
}
void BuddyResyncJob::joinGatherSlaves()
{
gatherSlave->join();
}
MetaBuddyResyncJobStatistics BuddyResyncJob::getJobStats()
{
std::lock_guard<Mutex> lock(stateMutex);
BuddyResyncerGatherSlave::Stats gatherStats = gatherSlave->getStats();
const uint64_t dirsDiscovered = gatherStats.dirsDiscovered;
const uint64_t gatherErrors = gatherStats.errors;
uint64_t dirsSynced = 0;
uint64_t filesSynced = 0;
uint64_t dirErrors = 0;
uint64_t fileErrors = 0;
for (auto syncerIt = bulkSyncSlaves.begin(); syncerIt != bulkSyncSlaves.end(); ++syncerIt)
{
BuddyResyncerBulkSyncSlave::Stats bulkSyncStats = (*syncerIt)->getStats();
dirsSynced += bulkSyncStats.dirsSynced;
filesSynced += bulkSyncStats.filesSynced;
dirErrors += bulkSyncStats.dirErrors;
fileErrors += bulkSyncStats.fileErrors;
}
SessionStoreResyncer::Stats sessionSyncStats = sessionStoreResyncer->getStats();
const uint64_t sessionsToSync = sessionSyncStats.sessionsToSync;
const uint64_t sessionsSynced = sessionSyncStats.sessionsSynced;
const bool sessionSyncErrors = sessionSyncStats.errors;
BuddyResyncerModSyncSlave::Stats modSyncStats = modSyncSlave->getStats();
uint64_t modObjectsSynced = modSyncStats.objectsSynced;
uint64_t modSyncErrors = modSyncStats.errors;
return MetaBuddyResyncJobStatistics(
state, startTime, endTime,
dirsDiscovered, gatherErrors,
dirsSynced, filesSynced, dirErrors, fileErrors,
sessionsToSync, sessionsSynced, sessionSyncErrors,
modObjectsSynced, modSyncErrors);
}
/**
* Determine the state for the buddy after the end of a resync job.
* @returns the new state to be set on the buddy accroding to this job's JobState.
*/
TargetConsistencyState BuddyResyncJob::newBuddyState()
{
switch (getState())
{
case BuddyResyncJobState_ERRORS:
case BuddyResyncJobState_INTERRUPTED:
case BuddyResyncJobState_FAILURE:
return TargetConsistencyState_BAD;
case BuddyResyncJobState_SUCCESS:
return TargetConsistencyState_GOOD;
default:
LOG(MIRRORING, ERR, "Undefined resync state.", state);
return TargetConsistencyState_BAD;
}
}
void BuddyResyncJob::informBuddy(const TargetConsistencyState newTargetState)
{
App* app = Program::getApp();
NodeStore* metaNodes = app->getMetaNodes();
MirrorBuddyGroupMapper* buddyGroups = app->getMetaBuddyGroupMapper();
NumNodeID buddyNodeID =
NumNodeID(buddyGroups->getBuddyTargetID(app->getLocalNodeNumID().val()));
auto metaNode = metaNodes->referenceNode(buddyNodeID);
if (!metaNode)
{
LOG(MIRRORING, ERR, "Unable to inform buddy about finished resync", buddyNodeID.str());
return;
}
UInt16List nodeIDs(1, buddyNodeID.val());
UInt8List states(1, newTargetState);
SetTargetConsistencyStatesMsg msg(NODETYPE_Meta, &nodeIDs, &states, false);
const auto respMsg = MessagingTk::requestResponse(*metaNode, msg,
NETMSGTYPE_SetTargetConsistencyStatesResp);
if (!respMsg)
{
LogContext(__func__).logErr(
"Unable to inform buddy about finished resync. "
"BuddyNodeID: " + buddyNodeID.str() + "; "
"error: Communication Error");
return;
}
{
auto* respMsgCast = static_cast<SetTargetConsistencyStatesRespMsg*>(respMsg.get());
FhgfsOpsErr result = respMsgCast->getResult();
if (result != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).logErr(
"Error while informing buddy about finished resync. "
"BuddyNodeID: " + buddyNodeID.str() + "; "
"error: " + boost::lexical_cast<std::string>(result) );
}
}
}
void BuddyResyncJob::informMgmtd(const TargetConsistencyState newTargetState)
{
App* app = Program::getApp();
NodeStore* mgmtNodes = app->getMgmtNodes();
auto mgmtNode = mgmtNodes->referenceFirstNode();
if (!mgmtNode)
{
LOG(MIRRORING, ERR, "Unable to communicate with management node.");
return;
}
UInt16List nodeIDs(1, buddyNodeID.val());
UInt8List states(1, newTargetState);
SetTargetConsistencyStatesMsg msg(NODETYPE_Meta, &nodeIDs, &states, false);
const auto respMsg = MessagingTk::requestResponse(*mgmtNode, msg,
NETMSGTYPE_SetTargetConsistencyStatesResp);
if (!respMsg)
{
LOG(MIRRORING, ERR,
"Unable to inform management node about finished resync: Communication error.");
return;
}
{
auto* respMsgCast = static_cast<SetTargetConsistencyStatesRespMsg*>(respMsg.get());
FhgfsOpsErr result = respMsgCast->getResult();
if (result != FhgfsOpsErr_SUCCESS)
LOG(MIRRORING, ERR, "Error informing management node about finished resync.", result);
}
}

View File

@@ -0,0 +1,92 @@
#pragma once
#include <components/buddyresyncer/BuddyResyncerGatherSlave.h>
#include <components/buddyresyncer/SessionStoreResyncer.h>
#include <common/storage/mirroring/BuddyResyncJobStatistics.h>
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/threading/PThread.h>
#include <common/nodes/NumNodeID.h>
#include <common/nodes/TargetStateInfo.h>
#include <atomic>
#include <mutex>
class BuddyResyncerBulkSyncSlave;
class BuddyResyncerModSyncSlave;
class BuddyResyncJob : public PThread
{
public:
BuddyResyncJob();
~BuddyResyncJob();
virtual void run();
void abort(bool wait_for_completion);
MetaBuddyResyncJobStatistics getJobStats();
std::atomic<unsigned int> threadCount{ 0 };
private:
BuddyResyncJobState state;
Mutex stateMutex;
int64_t startTime;
int64_t endTime;
NumNodeID buddyNodeID;
MetaSyncCandidateStore syncCandidates;
std::unique_ptr<BuddyResyncerGatherSlave> gatherSlave;
std::vector<std::unique_ptr<BuddyResyncerBulkSyncSlave>> bulkSyncSlaves;
std::unique_ptr<BuddyResyncerModSyncSlave> modSyncSlave;
std::unique_ptr<SessionStoreResyncer> sessionStoreResyncer;
bool startGatherSlaves();
bool startSyncSlaves();
void joinGatherSlaves();
public:
BuddyResyncJobState getState()
{
std::lock_guard<Mutex> lock(stateMutex);
return state;
}
bool isRunning()
{
std::lock_guard<Mutex> lock(stateMutex);
return state == BuddyResyncJobState_RUNNING;
}
void enqueue(MetaSyncCandidateFile syncCandidate, PThread* caller)
{
syncCandidates.add(std::move(syncCandidate), caller);
}
void registerOps()
{
this->threadCount += 1;
}
void unregisterOps()
{
this->threadCount -= 1;
}
private:
void setState(const BuddyResyncJobState state)
{
LOG_DEBUG(__func__, Log_DEBUG, "Setting state: "
+ StringTk::uintToStr(static_cast<int>(state) ) );
std::lock_guard<Mutex> lock(stateMutex);
this->state = state;
}
TargetConsistencyState newBuddyState();
void informBuddy(const TargetConsistencyState newTargetState);
void informMgmtd(const TargetConsistencyState newTargetState);
void stopAllWorkersOn(Barrier& barrier);
};

View File

@@ -0,0 +1,89 @@
#include "BuddyResyncer.h"
#include <program/Program.h>
__thread MetaSyncCandidateFile* BuddyResyncer::currentThreadChangeSet = 0;
BuddyResyncer::~BuddyResyncer()
{
if (job)
{
job->abort(false);
job->join();
SAFE_DELETE(job);
}
}
FhgfsOpsErr BuddyResyncer::startResync()
{
std::lock_guard<Mutex> lock(jobMutex);
if (noNewResyncs)
return FhgfsOpsErr_INTERRUPTED;
if (!job)
{
job = new BuddyResyncJob();
job->start();
return FhgfsOpsErr_SUCCESS;
}
switch (job->getState())
{
case BuddyResyncJobState_NOTSTARTED:
case BuddyResyncJobState_RUNNING:
return FhgfsOpsErr_INUSE;
default:
// a job must never be started more than once. to ensure this, we create a new job for
// every resync process, because doing so allows us to use NOTSTARTED and RUNNING as
// "job is currently active" values. otherwise, a second resync may see state SUCCESS and
// allow duplicate resyncer activity.
// if a job is still active, don't wait for very long - it may take a while to finish. the
// internode syncer will retry periodically, so this will work fine.
if (!job->timedjoin(10))
return FhgfsOpsErr_INUSE;
delete job;
job = new BuddyResyncJob();
job->start();
return FhgfsOpsErr_SUCCESS;
}
}
void BuddyResyncer::shutdown()
{
std::unique_ptr<BuddyResyncJob> job;
{
std::lock_guard<Mutex> lock(jobMutex);
job.reset(this->job);
this->job = nullptr;
noNewResyncs = true;
}
if (job)
{
job->abort(false);
job->join();
}
}
void BuddyResyncer::commitThreadChangeSet()
{
BEEGFS_BUG_ON(!currentThreadChangeSet, "no change set active");
auto* job = Program::getApp()->getBuddyResyncer()->getResyncJob();
std::unique_ptr<MetaSyncCandidateFile> candidate(currentThreadChangeSet);
currentThreadChangeSet = nullptr;
Barrier syncDone(2);
candidate->prepareSignal(syncDone);
job->enqueue(std::move(*candidate), PThread::getCurrentThread());
syncDone.wait();
}

View File

@@ -0,0 +1,69 @@
#pragma once
#include <components/buddyresyncer/BuddyResyncJob.h>
#include <common/storage/StorageErrors.h>
#include <common/Common.h>
#include <mutex>
/**
* This component does not represent a thread by itself. Instead, it manages a group of "slave
* threads" that are started and stopped when needed.
*
* Other components should only use this component as an interface and not access the slave threads
* directly.
*/
class BuddyResyncer
{
public:
BuddyResyncer()
: job(NULL), noNewResyncs(false)
{ }
~BuddyResyncer();
FhgfsOpsErr startResync();
void shutdown();
static void commitThreadChangeSet();
private:
BuddyResyncJob* job; // Note: In the Storage Server, this is a Map. Here it's just one pointer
// that's set to NULL when no job is present.
Mutex jobMutex;
public:
BuddyResyncJob* getResyncJob()
{
std::lock_guard<Mutex> lock(jobMutex);
return job;
}
static void registerSyncChangeset()
{
BEEGFS_BUG_ON(currentThreadChangeSet, "current changeset not nullptr");
currentThreadChangeSet = new MetaSyncCandidateFile;
}
static void abandonSyncChangeset()
{
delete currentThreadChangeSet;
currentThreadChangeSet = nullptr;
}
static MetaSyncCandidateFile* getSyncChangeset()
{
return currentThreadChangeSet;
}
private:
static __thread MetaSyncCandidateFile* currentThreadChangeSet;
bool noNewResyncs;
// No copy allowed
BuddyResyncer(const BuddyResyncer&);
BuddyResyncer& operator=(const BuddyResyncer&);
};

View File

@@ -0,0 +1,234 @@
#include "BuddyResyncerBulkSyncSlave.h"
#include <common/net/message/storage/mirroring/ResyncRawInodesRespMsg.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/MessagingTk.h>
#include <common/Common.h>
#include <net/message/storage/mirroring/ResyncRawInodesMsgEx.h>
#include <net/msghelpers/MsgHelperXAttr.h>
#include <program/Program.h>
#include <toolkit/XAttrTk.h>
#include <dirent.h>
BuddyResyncerBulkSyncSlave::BuddyResyncerBulkSyncSlave(BuddyResyncJob& parentJob,
MetaSyncCandidateStore* syncCandidates, uint8_t slaveID, const NumNodeID& buddyNodeID) :
SyncSlaveBase("BuddyResyncerBulkSyncSlave_" + StringTk::uintToStr(slaveID), parentJob,
buddyNodeID),
syncCandidates(syncCandidates)
{
}
void BuddyResyncerBulkSyncSlave::syncLoop()
{
EntryLockStore* const lockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
while (!getSelfTerminateNotIdle())
{
MetaSyncCandidateDir candidate;
syncCandidates->fetch(candidate, this);
// the sync candidate we have retrieved may be invalid if this thread was ordered to
// terminate and the sync candidate store has no more directories queued for us.
// in this case, we may end the sync because we have no more candidates, and the resync job
// guarantees that all gather threads have completed before the bulk syncers are ordered to
// finish.
if (syncCandidates->isDirsEmpty() && candidate.getRelativePath().empty() &&
getSelfTerminate())
return;
if (candidate.getType() == MetaSyncDirType::InodesHashDir ||
candidate.getType() == MetaSyncDirType::DentriesHashDir)
{
// lock the hash path in accordance with MkLocalDir, RmLocalDir and RmDir.
const auto& hashDir = candidate.getRelativePath();
auto slash1 = hashDir.find('/');
auto slash2 = hashDir.find('/', slash1 + 1);
auto hash1 = StringTk::strHexToUInt(hashDir.substr(slash1 + 1, slash2 - slash1 - 1));
auto hash2 = StringTk::strHexToUInt(hashDir.substr(slash2 + 1));
HashDirLock hashLock = {lockStore, {hash1, hash2}};
const FhgfsOpsErr resyncRes = resyncDirectory(candidate, "");
if (resyncRes == FhgfsOpsErr_SUCCESS)
continue;
numDirErrors.increase();
parentJob->abort(false);
return;
}
// not a hash dir, so it must be a content directory. sync the #fSiDs# first, then the actual
// content directory. we lock the directory inode the content directory belongs to because we
// must not allow a concurrent meta action to delete the content directory while we are
// resyncing it. concurrent modification of directory contents could be allowed, though.
const std::string dirInodeID = Path(candidate.getRelativePath()).back();
const std::string fullPath = META_BUDDYMIRROR_SUBDIR_NAME "/" + candidate.getRelativePath();
FileIDLock dirLock(lockStore, dirInodeID, false);
// first ensure that the directory still exists - a concurrent modification may have deleted
// it. this would not be an error; bulk resync should not touch it, an modification sync
// would remove it completely.
if (::access(fullPath.c_str(), F_OK) != 0 && errno == ENOENT)
{
numDirsSynced.increase(); // Count it anyway, so the sums match up.
continue;
}
MetaSyncCandidateDir fsIDs(
candidate.getRelativePath() + "/" + META_DIRENTRYID_SUB_STR,
MetaSyncDirType::InodesHashDir);
FhgfsOpsErr resyncRes = resyncDirectory(fsIDs, dirInodeID);
if (resyncRes == FhgfsOpsErr_SUCCESS)
resyncRes = resyncDirectory(candidate, dirInodeID);
if (resyncRes != FhgfsOpsErr_SUCCESS)
{
numDirErrors.increase();
parentJob->abort(false);
return;
}
else
{
numDirsSynced.increase();
}
}
}
FhgfsOpsErr BuddyResyncerBulkSyncSlave::resyncDirectory(const MetaSyncCandidateDir& root,
const std::string& inodeID)
{
StreamCandidateArgs args(*this, root, inodeID);
return resyncAt(Path(root.getRelativePath()), true, streamCandidateDir, &args);
}
FhgfsOpsErr BuddyResyncerBulkSyncSlave::streamCandidateDir(Socket& socket,
const MetaSyncCandidateDir& candidate, const std::string& inodeID)
{
EntryLockStore* const lockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
Path candidatePath(META_BUDDYMIRROR_SUBDIR_NAME "/" + candidate.getRelativePath());
std::unique_ptr<DIR, StorageTk::CloseDirDeleter> dir(opendir(candidatePath.str().c_str()));
if (!dir)
{
LOG(MIRRORING, ERR, "Could not open candidate directory.", candidatePath, sysErr);
return FhgfsOpsErr_INTERNAL;
}
int dirFD = ::dirfd(dir.get());
if (dirFD < 0)
{
LOG(MIRRORING, ERR, "Could not open candidate directory.", candidatePath, sysErr);
return FhgfsOpsErr_INTERNAL;
}
while (true)
{
struct dirent* entry;
#if USE_READDIR_P
struct dirent entryBuf;
int err = ::readdir_r(dir.get(), &entryBuf, &entry);
#else
errno = 0;
entry = readdir(dir.get());
int err = entry ? 0 : errno;
#endif
if (err > 0)
{
LOG(MIRRORING, ERR, "Could not read candidate directory.", candidatePath, sysErr);
numDirErrors.increase();
break;
}
if (!entry)
break;
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
continue;
struct stat statData;
if (::fstatat(dirFD, entry->d_name, &statData, AT_SYMLINK_NOFOLLOW) < 0)
{
// the file/directory may have gone away. this is not an error, and the secondary will
// delete the file/directory as well.
if (errno == ENOENT)
continue;
LOG(MIRRORING, ERR, "Could not stat resync candidate.",
candidatePath, entry->d_name, sysErr);
numFileErrors.increase();
continue;
}
if (!S_ISDIR(statData.st_mode) && !S_ISREG(statData.st_mode))
{
LOG(MIRRORING, ERR, "Resync candidate is neither file nor directory.",
candidatePath, entry->d_name, statData.st_mode);
numFileErrors.increase();
continue;
}
if (candidate.getType() == MetaSyncDirType::ContentDir)
{
// if it's in a content directory and a directory, it can really only be the fsids dir.
// locking for this case is already sorted, so we only have to transfer the (empty)
// inode metadata to tell the secondary that the directory may stay.
if (S_ISDIR(statData.st_mode))
{
const FhgfsOpsErr streamRes = streamInode(socket, Path(entry->d_name), true);
if (streamRes != FhgfsOpsErr_SUCCESS)
return streamRes;
}
else
{
ParentNameLock dentryLock(lockStore, inodeID, entry->d_name);
const auto streamRes = streamDentry(socket, Path(), entry->d_name);
if (streamRes != FhgfsOpsErr_SUCCESS)
{
numFileErrors.increase();
return streamRes;
}
else
{
numFilesSynced.increase();
}
}
continue;
}
// we are now either in a fsids (file inode) directory or a second-level inode hash-dir,
// which may contain either file or directory inodes. taking a lock unnecessarily is stilll
// cheaper than reading the inode from disk to determine its type, so just lock the inode id
// as file
FileIDLock dirLock(lockStore, entry->d_name, true);
// access the file once more, because it may have been deleted in the meantime. a new entry
// with the same name cannot appear in a sane filesystem (that would indicate an ID being
// reused).
if (faccessat(dirFD, entry->d_name, F_OK, 0) < 0 && errno == ENOENT)
continue;
const FhgfsOpsErr streamRes = streamInode(socket, Path(entry->d_name),
S_ISDIR(statData.st_mode));
if (streamRes != FhgfsOpsErr_SUCCESS)
{
numFileErrors.increase();
return streamRes;
}
else
{
numFilesSynced.increase();
}
}
return sendResyncPacket(socket, std::tuple<>());
}

View File

@@ -0,0 +1,67 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/threading/PThread.h>
#include <common/storage/StorageErrors.h>
#include <common/nodes/Node.h>
#include <app/App.h>
#include <components/buddyresyncer/SyncCandidate.h>
#include "SyncSlaveBase.h"
class DirEntry;
class BuddyResyncerBulkSyncSlave : public SyncSlaveBase
{
friend class BuddyResyncer;
friend class BuddyResyncJob;
public:
BuddyResyncerBulkSyncSlave(BuddyResyncJob& parentJob, MetaSyncCandidateStore* syncCandidates, uint8_t slaveID,
const NumNodeID& buddyNodeID);
struct Stats
{
uint64_t dirsSynced;
uint64_t filesSynced;
uint64_t dirErrors;
uint64_t fileErrors;
};
Stats getStats()
{
return Stats{ numDirsSynced.read(), numFilesSynced.read(),
numDirErrors.read(), numFileErrors.read() };
}
private:
MetaSyncCandidateStore* syncCandidates;
AtomicUInt64 numDirsSynced;
AtomicUInt64 numFilesSynced;
AtomicUInt64 numDirErrors;
AtomicUInt64 numFileErrors;
void syncLoop();
FhgfsOpsErr resyncDirectory(const MetaSyncCandidateDir& root, const std::string& inodeID);
FhgfsOpsErr streamCandidateDir(Socket& socket, const MetaSyncCandidateDir& candidate,
const std::string& inodeID);
private:
typedef std::tuple<
BuddyResyncerBulkSyncSlave&,
const MetaSyncCandidateDir&,
const std::string&> StreamCandidateArgs;
static FhgfsOpsErr streamCandidateDir(Socket* socket, void* context)
{
using std::get;
auto& args = *(StreamCandidateArgs*) context;
return get<0>(args).streamCandidateDir(*socket, get<1>(args), get<2>(args));
}
};

View File

@@ -0,0 +1,134 @@
#include <common/storage/Metadata.h>
#include <common/app/log/LogContext.h>
#include <common/toolkit/StringTk.h>
#include <toolkit/BuddyCommTk.h>
#include <program/Program.h>
#include "BuddyResyncerGatherSlave.h"
BuddyResyncerGatherSlave::BuddyResyncerGatherSlave(MetaSyncCandidateStore* syncCandidates) :
PThread("BuddyResyncerGatherSlave"),
isRunning(false),
syncCandidates(syncCandidates)
{
metaBuddyPath = Program::getApp()->getMetaPath() + "/" CONFIG_BUDDYMIRROR_SUBDIR_NAME;
}
void BuddyResyncerGatherSlave::run()
{
setIsRunning(true);
try
{
LOG(MIRRORING, DEBUG, "Component started");
registerSignalHandler();
workLoop();
LOG(MIRRORING, DEBUG, "Component stopped");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerGatherSlave::workLoop()
{
crawlDir(metaBuddyPath + "/" META_INODES_SUBDIR_NAME, MetaSyncDirType::InodesHashDir);
crawlDir(metaBuddyPath + "/" META_DENTRIES_SUBDIR_NAME, MetaSyncDirType::DentriesHashDir);
}
void BuddyResyncerGatherSlave::crawlDir(const std::string& path, const MetaSyncDirType type,
const unsigned level)
{
LOG_DBG(MIRRORING, DEBUG, "Entering hash dir.", level, path);
std::unique_ptr<DIR, StorageTk::CloseDirDeleter> dirHandle(::opendir(path.c_str()));
if (!dirHandle)
{
LOG(MIRRORING, ERR, "Unable to open path", path, sysErr);
numErrors.increase();
return;
}
while (!getSelfTerminate())
{
struct dirent* entry;
#if USE_READDIR_R
struct dirent buffer;
const int readRes = ::readdir_r(dirHandle.get(), &buffer, &entry);
#else
errno = 0;
entry = ::readdir(dirHandle.get());
const int readRes = entry ? 0 : errno;
#endif
if (readRes != 0)
{
LOG(MIRRORING, ERR, "Could not read dir entry.", path, sysErr(readRes));
numErrors.increase();
return;
}
if (!entry)
break;
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
continue;
const std::string& candidatePath = path + "/" + entry->d_name;
struct stat statBuf;
const int statRes = ::stat(candidatePath.c_str(), &statBuf);
if (statRes)
{
// in a 2nd level dentry hashdir, content directories may disappear - this is not an error,
// it was most likely caused by an rmdir issued by a user.
if (!(errno == ENOENT && type == MetaSyncDirType::DentriesHashDir && level == 2))
{
LOG(MIRRORING, ERR, "Could not stat dir entry.", candidatePath, sysErr);
numErrors.increase();
}
continue;
}
if (!S_ISDIR(statBuf.st_mode))
{
LOG(MIRRORING, ERR, "Found a non-dir where only directories are expected.", candidatePath,
oct(statBuf.st_mode));
numErrors.increase();
continue;
}
// layout is: (dentries|inodes)/l1/l2/...
// -> level 0 correlates with type
// -> level 1 is not very interesting, except for reporting
// -> level 2 must be synced. if it is a dentry hashdir, its contents must also be crawled.
if (level == 0)
{
crawlDir(candidatePath, type, level + 1);
continue;
}
if (level == 1)
{
LOG_DBG(MIRRORING, DEBUG, "Adding hashdir sync candidate.", candidatePath);
addCandidate(candidatePath, type);
if (type == MetaSyncDirType::DentriesHashDir)
crawlDir(candidatePath, type, level + 1);
continue;
}
// so here we read a 2nd level dentry hashdir. crawl that too, add sync candidates for each
// entry we find - non-directories have already been reported, and the bulk resyncer will
// take care of the fsids directories.
numDirsDiscovered.increase();
LOG_DBG(MIRRORING, DEBUG, "Adding contdir sync candidate.", candidatePath);
addCandidate(candidatePath, MetaSyncDirType::ContentDir);
}
}

View File

@@ -0,0 +1,73 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/threading/PThread.h>
#include <components/buddyresyncer/SyncCandidate.h>
#include <mutex>
class BuddyResyncerGatherSlave : public PThread
{
// Grant access to internal mutex
friend class BuddyResyncer;
friend class BuddyResyncJob;
public:
BuddyResyncerGatherSlave(MetaSyncCandidateStore* syncCandidates);
void workLoop();
private:
Mutex stateMutex;
Condition isRunningChangeCond;
AtomicUInt64 numDirsDiscovered;
AtomicUInt64 numErrors;
std::string metaBuddyPath;
bool isRunning;
MetaSyncCandidateStore* syncCandidates;
virtual void run();
void crawlDir(const std::string& path, const MetaSyncDirType type, const unsigned level = 0);
public:
bool getIsRunning()
{
std::lock_guard<Mutex> lock(stateMutex);
return this->isRunning;
}
struct Stats
{
uint64_t dirsDiscovered;
uint64_t errors;
};
Stats getStats()
{
return Stats{ numDirsDiscovered.read(), numErrors.read() };
}
private:
void setIsRunning(const bool isRunning)
{
std::lock_guard<Mutex> lock(stateMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
void addCandidate(const std::string& path, const MetaSyncDirType type)
{
const std::string& relPath = path.substr(metaBuddyPath.size() + 1);
syncCandidates->add(MetaSyncCandidateDir(relPath, type), this);
}
};
typedef std::vector<BuddyResyncerGatherSlave*> BuddyResyncerGatherSlaveVec;
typedef BuddyResyncerGatherSlaveVec::iterator BuddyResyncerGatherSlaveVecIter;

View File

@@ -0,0 +1,142 @@
#include "BuddyResyncerModSyncSlave.h"
#include <common/net/message/storage/mirroring/ResyncRawInodesRespMsg.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/MessagingTk.h>
#include <common/toolkit/DebugVariable.h>
#include <common/Common.h>
#include <net/message/storage/mirroring/ResyncRawInodesMsgEx.h>
#include <net/msghelpers/MsgHelperXAttr.h>
#include <program/Program.h>
#include <toolkit/XAttrTk.h>
BuddyResyncerModSyncSlave::BuddyResyncerModSyncSlave(BuddyResyncJob& parentJob,
MetaSyncCandidateStore* syncCandidates, uint8_t slaveID, const NumNodeID& buddyNodeID) :
SyncSlaveBase("BuddyResyncerModSyncSlave_" + StringTk::uintToStr(slaveID), parentJob,
buddyNodeID),
syncCandidates(syncCandidates)
{
}
void BuddyResyncerModSyncSlave::syncLoop()
{
while (!getSelfTerminateNotIdle())
{
if (syncCandidates->waitForFiles(this))
resyncAt(Path(), false, streamCandidates, this);
else if (getOnlyTerminateIfIdle())
break;
}
}
namespace {
struct CandidateSignaler
{
void operator()(MetaSyncCandidateFile* candidate) const
{
candidate->signal();
}
};
bool resyncElemCmp(const MetaSyncCandidateFile::Element& a, const MetaSyncCandidateFile::Element& b)
{
// we must sync deletions before updates and inodes before everything else:
//
// deletions may fail on the secondary, so they *can* be synced first to begin with.
// any item that is deleted and then recreated with an update must be deleted first.
// we also guarantee that no item is created and deleted in the same changeset.
//
// inodes must be synced before dentries because the dentries may link to inodes in the same
// changeset - and if the secondary does not have the appropriate inode yet, the changeset
// must create it.
if (a.isDeletion && !b.isDeletion)
return true;
if (a.type == MetaSyncFileType::Inode && b.type != MetaSyncFileType::Inode)
return true;
return std::make_pair(int(a.type), a.path) < std::make_pair(int(b.type), b.path);
}
}
FhgfsOpsErr BuddyResyncerModSyncSlave::streamCandidates(Socket& socket)
{
DEBUG_ENV_VAR(unsigned, DEBUG_FAIL_MODSYNC, 0, "BEEGFS_DEBUG_FAIL_MODSYNC");
while (!getSelfTerminateNotIdle())
{
if (syncCandidates->isFilesEmpty())
break;
MetaSyncCandidateFile candidate;
syncCandidates->fetch(candidate, this);
// signal the candidate at the end of this loop iteration.
// do it like this because we have a few exit points and also have exceptions to take into
// account.
std::unique_ptr<MetaSyncCandidateFile, CandidateSignaler> signaler(&candidate);
auto resyncElems = candidate.releaseElements();
std::sort(resyncElems.begin(), resyncElems.end(), resyncElemCmp);
for (auto it = resyncElems.begin(); it != resyncElems.end(); ++it)
{
const auto& element = *it;
// element.path is relative to the meta root, so we have to chop off the buddymir/ prefix
const Path itemPath(element.path.substr(strlen(META_BUDDYMIRROR_SUBDIR_NAME) + 1));
FhgfsOpsErr resyncRes;
LOG_DBG(MIRRORING, DEBUG, "Syncing one modification.", element.path, element.isDeletion,
int(element.type));
switch (element.type)
{
case MetaSyncFileType::Dentry:
resyncRes = element.isDeletion
? deleteDentry(socket, itemPath.dirname(), itemPath.back())
: streamDentry(socket, itemPath.dirname(), itemPath.back());
break;
case MetaSyncFileType::Directory:
case MetaSyncFileType::Inode:
resyncRes = element.isDeletion
? deleteInode(socket, itemPath, element.type == MetaSyncFileType::Directory)
: streamInode(socket, itemPath, element.type == MetaSyncFileType::Directory);
break;
default:
LOG(MIRRORING, ERR, "this should never happen");
return FhgfsOpsErr_INTERNAL;
}
if (resyncRes != FhgfsOpsErr_SUCCESS || DEBUG_FAIL_MODSYNC)
{
LOG(MIRRORING, ERR, "Modification resync failed.", element.path, element.isDeletion,
resyncRes);
numErrors.increase();
// Since this error prevents the resync from reaching a GOOD state on the secondary,
// we abort here.
parentJob->abort(true);
// terminate the current stream, start a new one if necessary. we could (in theory)
// reuse the current stream, but terminating a stream that has seen an error is simpler
// to handle than keeping it open. also, bulk resync would like "fail on error"
// semantics very much.
sendResyncPacket(socket, std::tuple<>());
return FhgfsOpsErr_SUCCESS;
}
else
{
numObjectsSynced.increase();
}
}
}
sendResyncPacket(socket, std::tuple<>());
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,50 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/threading/PThread.h>
#include <common/storage/StorageErrors.h>
#include <common/nodes/Node.h>
#include <app/App.h>
#include <components/buddyresyncer/SyncCandidate.h>
#include "SyncSlaveBase.h"
class DirEntry;
class BuddyResyncerModSyncSlave : public SyncSlaveBase
{
friend class BuddyResyncer;
friend class BuddyResyncJob;
public:
BuddyResyncerModSyncSlave(BuddyResyncJob& parentJob, MetaSyncCandidateStore* syncCandidates,
uint8_t slaveID, const NumNodeID& buddyNodeID);
struct Stats
{
uint64_t objectsSynced;
uint64_t errors;
};
Stats getStats()
{
return Stats{ numObjectsSynced.read(), numErrors.read() };
}
private:
MetaSyncCandidateStore* syncCandidates;
AtomicUInt64 numObjectsSynced;
AtomicUInt64 numErrors;
void syncLoop();
FhgfsOpsErr streamCandidates(Socket& socket);
private:
static FhgfsOpsErr streamCandidates(Socket* socket, void* context)
{
return static_cast<BuddyResyncerModSyncSlave*>(context)->streamCandidates(*socket);
}
};

View File

@@ -0,0 +1,59 @@
#include "SessionStoreResyncer.h"
#include <common/toolkit/MessagingTk.h>
#include <common/net/message/storage/mirroring/ResyncSessionStoreMsg.h>
#include <common/net/message/storage/mirroring/ResyncSessionStoreRespMsg.h>
#include <common/toolkit/StringTk.h>
#include <program/Program.h>
#include <app/App.h>
#include <boost/scoped_array.hpp>
SessionStoreResyncer::SessionStoreResyncer(const NumNodeID& buddyNodeID)
: buddyNodeID(buddyNodeID) {}
void SessionStoreResyncer::doSync()
{
App* app = Program::getApp();
SessionStore* sessions = app->getMirroredSessions();
NodeStoreServers* metaNodes = app->getMetaNodes();
const uint64_t numSessions = sessions->getSize();
numSessionsToSync.set(numSessions);
// Serialize sessions store into buffer
std::pair<std::unique_ptr<char[]>, size_t> sessionStoreSerBuf = sessions->serializeToBuf();
if (sessionStoreSerBuf.second == 0)
{
// Serialization failed.
errors.set(1);
return;
}
LOG(MIRRORING, DEBUG, "Serialized session store", ("size", sessionStoreSerBuf.second));
ResyncSessionStoreMsg msg(sessionStoreSerBuf.first.get(), sessionStoreSerBuf.second);
RequestResponseArgs rrArgs(NULL, &msg, NETMSGTYPE_ResyncSessionStoreResp);
RequestResponseNode rrNode(buddyNodeID, metaNodes);
msg.registerStreamoutHook(rrArgs);
FhgfsOpsErr requestRes = MessagingTk::requestResponseNode(&rrNode, &rrArgs);
if (requestRes != FhgfsOpsErr_SUCCESS)
{
errors.set(1);
LOG(MIRRORING, ERR, "Request failed.", requestRes);
return;
}
ResyncSessionStoreRespMsg* resp = (ResyncSessionStoreRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr retVal = resp->getResult();
LOG(MIRRORING, DEBUG, "ResyncSessionStoreRespMsg", retVal);
if (retVal != FhgfsOpsErr_SUCCESS)
errors.set(1);
else
numSessionsSynced.set(numSessions);
}

View File

@@ -0,0 +1,35 @@
#pragma once
#include <common/nodes/Node.h>
#include <common/threading/PThread.h>
class SessionStoreResyncer
{
friend class BuddyResyncer;
friend class BuddyResyncJob;
public:
SessionStoreResyncer(const NumNodeID& buddyNodeID);
struct Stats
{
uint64_t sessionsToSync;
uint64_t sessionsSynced;
bool errors;
};
Stats getStats()
{
return Stats{ numSessionsToSync.read(), numSessionsSynced.read(), errors.read() != 0 };
}
private:
NumNodeID buddyNodeID;
AtomicUInt64 numSessionsToSync;
AtomicUInt64 numSessionsSynced;
AtomicSizeT errors; // 0 / 1
void doSync();
};

View File

@@ -0,0 +1,113 @@
#pragma once
#include <common/toolkit/serialization/Serialization.h>
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/threading/Barrier.h>
#include <string>
enum class MetaSyncDirType
{
InodesHashDir,
DentriesHashDir,
ContentDir,
};
GCC_COMPAT_ENUM_CLASS_OPEQNEQ(MetaSyncDirType)
class MetaSyncCandidateDir
{
public:
MetaSyncCandidateDir(const std::string& relativePath, MetaSyncDirType type):
relPath(relativePath), type(type)
{}
MetaSyncCandidateDir() = default;
private:
std::string relPath;
MetaSyncDirType type;
public:
const std::string& getRelativePath() const { return relPath; }
MetaSyncDirType getType() const { return type; }
};
enum class MetaSyncFileType
{
Inode,
Dentry,
Directory,
};
GCC_COMPAT_ENUM_CLASS_OPEQNEQ(MetaSyncFileType)
template<>
struct SerializeAs<MetaSyncFileType> {
typedef uint8_t type;
};
class MetaSyncCandidateFile
{
public:
struct Element
{
std::string path;
MetaSyncFileType type;
bool isDeletion;
};
MetaSyncCandidateFile(): barrier(nullptr) {}
MetaSyncCandidateFile(MetaSyncCandidateFile&& src):
barrier(nullptr)
{
swap(src);
}
MetaSyncCandidateFile& operator=(MetaSyncCandidateFile&& other)
{
MetaSyncCandidateFile(std::move(other)).swap(*this);
return *this;
}
void swap(MetaSyncCandidateFile& other)
{
paths.swap(other.paths);
std::swap(barrier, other.barrier);
}
void signal()
{
barrier->wait();
}
friend void swap(MetaSyncCandidateFile& a, MetaSyncCandidateFile& b)
{
a.swap(b);
}
private:
std::vector<Element> paths;
Barrier* barrier;
public:
const std::vector<Element>& getElements() const { return paths; }
std::vector<Element> releaseElements() { return std::move(paths); }
void addModification(std::string path, MetaSyncFileType type)
{
paths.push_back(Element{std::move(path), type, false});
}
void addDeletion(std::string path, MetaSyncFileType type)
{
paths.push_back(Element{std::move(path), type, true});
}
void prepareSignal(Barrier& barrier)
{
this->barrier = &barrier;
}
};
typedef SyncCandidateStore<MetaSyncCandidateDir, MetaSyncCandidateFile> MetaSyncCandidateStore;

View File

@@ -0,0 +1,249 @@
#include "SyncSlaveBase.h"
#include <common/net/message/storage/mirroring/ResyncRawInodesRespMsg.h>
#include <net/message/storage/mirroring/ResyncRawInodesMsgEx.h>
#include <net/msghelpers/MsgHelperXAttr.h>
#include <program/Program.h>
#include <toolkit/XAttrTk.h>
void SyncSlaveBase::run()
{
setIsRunning(true);
try
{
LOG(MIRRORING, DEBUG, "Component started");
registerSignalHandler();
syncLoop();
LOG(MIRRORING, DEBUG, "Component stopped");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
FhgfsOpsErr SyncSlaveBase::receiveAck(Socket& socket)
{
auto resp = MessagingTk::recvMsgBuf(socket);
if (resp.empty())
return FhgfsOpsErr_INTERNAL;
const auto respMsg = PThread::getCurrentThreadApp()->getNetMessageFactory()->createFromBuf(
std::move(resp));
if (respMsg->getMsgType() != NETMSGTYPE_ResyncRawInodesResp)
return FhgfsOpsErr_COMMUNICATION;
return static_cast<ResyncRawInodesRespMsg&>(*respMsg).getResult();
}
FhgfsOpsErr SyncSlaveBase::resyncAt(const Path& basePath, bool wholeDirectory,
FhgfsOpsErr (*streamFn)(Socket*, void*), void* context)
{
const bool sendXAttrs = Program::getApp()->getConfig()->getStoreClientXAttrs();
this->basePath = META_BUDDYMIRROR_SUBDIR_NAME / basePath;
ResyncRawInodesMsgEx msg(basePath, sendXAttrs, wholeDirectory);
RequestResponseNode rrNode(buddyNodeID, Program::getApp()->getMetaNodes());
RequestResponseArgs rrArgs(nullptr, &msg, NETMSGTYPE_ResyncRawInodesResp,
streamFn, context);
// resync processing may take a very long time for each step, eg if a very large directory must
// be cleaned out on the secondary. do not use timeouts for resync communication right now.
rrArgs.minTimeoutMS = -1;
const auto commRes = MessagingTk::requestResponseNode(&rrNode, &rrArgs);
if (commRes != FhgfsOpsErr_SUCCESS)
{
LOG(MIRRORING, ERR, "Error during communication with secondary.", commRes);
return commRes;
}
const auto resyncRes = static_cast<ResyncRawInodesRespMsg&>(*rrArgs.outRespMsg).getResult();
if (resyncRes != FhgfsOpsErr_SUCCESS)
LOG(MIRRORING, ERR, "Error while resyncing directory.", basePath, resyncRes);
return resyncRes;
}
FhgfsOpsErr SyncSlaveBase::streamDentry(Socket& socket, const Path& contDirRelPath,
const std::string& name)
{
std::unique_ptr<DirEntry> dentry(
DirEntry::createFromFile((basePath / contDirRelPath).str(), name));
if (!dentry)
{
LOG(MIRRORING, ERR, "Could not open dentry.", basePath, contDirRelPath, name);
return FhgfsOpsErr_INTERNAL;
}
if (dentry->getIsInodeInlined())
{
auto err = sendResyncPacket(socket, LinkDentryInfo(
MetaSyncFileType::Dentry,
(contDirRelPath / name).str(),
true,
dentry->getID(),
false));
if (err != FhgfsOpsErr_SUCCESS)
return err;
return receiveAck(socket);
}
std::vector<char> dentryContent;
{
Serializer ser;
dentry->serializeDentry(ser);
dentryContent.resize(ser.size());
ser = Serializer(&dentryContent[0], dentryContent.size());
dentry->serializeDentry(ser);
if (!ser.good())
{
LOG(MIRRORING, ERR, "Could not serialize dentry for secondary.");
return FhgfsOpsErr_INTERNAL;
}
}
const FhgfsOpsErr sendRes = sendResyncPacket(socket, FullDentryInfo(
MetaSyncFileType::Dentry,
(contDirRelPath / name).str(),
false,
dentryContent,
false));
if (sendRes != FhgfsOpsErr_SUCCESS)
return sendRes;
return receiveAck(socket);
}
FhgfsOpsErr SyncSlaveBase::streamInode(Socket& socket, const Path& inodeRelPath,
const bool isDirectory)
{
const Path fullPath(basePath / inodeRelPath);
MetaStore& store = *Program::getApp()->getMetaStore();
// Map to store attribute name and its data
std::map<std::string, std::vector<char>> contents;
if (!isDirectory)
{
std::vector<char> attrData;
FhgfsOpsErr readRes;
// Helper function to read and store attribute data in map
auto readAndStoreMetaAttribute = [&](const std::string& attrName)
{
attrData.clear();
readRes = store.getRawMetadata(fullPath, attrName.c_str(), attrData);
if (readRes != FhgfsOpsErr_SUCCESS)
return false;
contents.insert(std::make_pair(attrName, std::move(attrData)));
return true;
};
// Handle META_XATTR_NAME ("user.fhgfs") separately because it can be stored as either
// file contents or an extended attribute, depending on the 'storeUseExtendedAttribs'
// configuration setting in the meta config. In contrast, all other metadata-specific
// attributes are strictly stored as extended attributes and do not have the option to
// be stored as file contents.
if (!readAndStoreMetaAttribute(META_XATTR_NAME))
return readRes;
// Now handle all remaining metadata attributes
std::pair<FhgfsOpsErr, std::vector<std::string>> listXAttrs = XAttrTk::listXAttrs(fullPath.str());
if (listXAttrs.first != FhgfsOpsErr_SUCCESS)
return listXAttrs.first;
for (auto const& attrName : listXAttrs.second)
{
// Process all metadata-specific attributes except META_XATTR_NAME (already handled above)
// This approach ensures we only process attribute(s) that:
// 1. Exist on the inode.
// 2. Are listed in METADATA_XATTR_NAME_LIST, our collection of known metadata attributes.
// 3. Is not META_XATTR_NAME, to prevent duplicate processing.
if (std::find(METADATA_XATTR_NAME_LIST.begin(), METADATA_XATTR_NAME_LIST.end(), attrName)
!= METADATA_XATTR_NAME_LIST.end() && (attrName != META_XATTR_NAME))
{
if (!readAndStoreMetaAttribute(attrName))
return readRes;
}
}
}
const FhgfsOpsErr sendRes = sendResyncPacket(socket, InodeInfo(
isDirectory
? MetaSyncFileType::Directory
: MetaSyncFileType::Inode,
inodeRelPath.str(),
contents,
false));
if (sendRes != FhgfsOpsErr_SUCCESS)
return sendRes;
if (Program::getApp()->getConfig()->getStoreClientXAttrs())
{
auto xattrs = XAttrTk::listUserXAttrs(fullPath.str());
if (xattrs.first != FhgfsOpsErr_SUCCESS)
{
LOG(MIRRORING, ERR, "Could not list resync candidate xattrs.", fullPath, ("error", xattrs.first));
xattrs.second.clear();
return FhgfsOpsErr_INTERNAL;
}
MsgHelperXAttr::StreamXAttrState state(fullPath.str(), std::move(xattrs.second));
const FhgfsOpsErr xattrRes = MsgHelperXAttr::StreamXAttrState::streamXattrFn(&socket, &state);
if (xattrRes != FhgfsOpsErr_SUCCESS)
{
LOG(MIRRORING, ERR, "Error while sending xattrs to secondary.", fullPath, xattrRes);
return FhgfsOpsErr_INTERNAL;
}
}
return receiveAck(socket);
}
FhgfsOpsErr SyncSlaveBase::deleteDentry(Socket& socket, const Path& contDirRelPath,
const std::string& name)
{
auto err = sendResyncPacket(socket, LinkDentryInfo(
MetaSyncFileType::Dentry,
(contDirRelPath / name).str(),
true,
{},
true));
if (err != FhgfsOpsErr_SUCCESS)
return err;
return receiveAck(socket);
}
FhgfsOpsErr SyncSlaveBase::deleteInode(Socket& socket, const Path& inodeRelPath,
const bool isDirectory)
{
auto err = sendResyncPacket(socket, InodeInfo(
isDirectory
? MetaSyncFileType::Directory
: MetaSyncFileType::Inode,
inodeRelPath.str(),
{},
true));
if (err != FhgfsOpsErr_SUCCESS)
return err;
return receiveAck(socket);
}

View File

@@ -0,0 +1,129 @@
#pragma once
#include <common/net/sock/Socket.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/PThread.h>
#include <app/App.h>
class DirEntry;
class SyncSlaveBase : public PThread
{
public:
bool getIsRunning()
{
std::lock_guard<Mutex> lock(stateMutex);
return this->isRunning;
}
void setOnlyTerminateIfIdle(bool value)
{
onlyTerminateIfIdle.set(value);
}
bool getOnlyTerminateIfIdle()
{
return onlyTerminateIfIdle.read();
}
protected:
BuddyResyncJob* parentJob;
NumNodeID buddyNodeID;
Mutex stateMutex;
Condition isRunningChangeCond;
AtomicSizeT onlyTerminateIfIdle;
bool isRunning;
Path basePath;
SyncSlaveBase(const std::string& threadName, BuddyResyncJob& parentJob,
const NumNodeID buddyNodeID):
PThread(threadName), parentJob(&parentJob), buddyNodeID(buddyNodeID), isRunning(false)
{
}
virtual void run() override;
virtual void syncLoop() = 0;
FhgfsOpsErr resyncAt(const Path& basePath, bool wholeDirectory,
FhgfsOpsErr (*streamFn)(Socket*, void*), void* context);
FhgfsOpsErr streamDentry(Socket& socket, const Path& contDirRelPath, const std::string& name);
FhgfsOpsErr streamInode(Socket& socket, const Path& inodeRelPath, const bool isDirectory);
FhgfsOpsErr deleteDentry(Socket& socket, const Path& contDirRelPath, const std::string& name);
FhgfsOpsErr deleteInode(Socket& socket, const Path& inodeRelPath, const bool isDirectory);
void setIsRunning(bool isRunning)
{
std::lock_guard<Mutex> lock(stateMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return getSelfTerminate() && !getOnlyTerminateIfIdle();
}
template<typename ValueT>
static FhgfsOpsErr sendResyncPacket(Socket& socket, const ValueT& value)
{
Serializer ser;
ser % value;
const unsigned packetSize = ser.size();
const unsigned totalSize = packetSize + sizeof(uint32_t);
const std::tuple<uint32_t, const ValueT&> packet(packetSize, value);
std::unique_ptr<char[]> buffer(new (std::nothrow) char[totalSize]);
if (!buffer)
{
LOG(MIRRORING, ERR, "Could not allocate memory for resync packet.");
return FhgfsOpsErr_OUTOFMEM;
}
ser = {buffer.get(), totalSize};
ser % packet;
if (!ser.good())
{
LOG(MIRRORING, ERR, "Serialization of resync packet failed.");
return FhgfsOpsErr_INTERNAL;
}
socket.send(buffer.get(), totalSize, 0);
return FhgfsOpsErr_SUCCESS;
}
static FhgfsOpsErr receiveAck(Socket& socket);
private:
typedef std::tuple<
MetaSyncFileType,
const std::string&, // relative path
bool, // is hardlink?
const std::string&, // link target entry id
bool // is deletion?
> LinkDentryInfo;
typedef std::tuple<
MetaSyncFileType,
const std::string&, // relative path
bool, // is hardlink?
const std::vector<char>&, // dentry raw content
bool // is deletion?
> FullDentryInfo;
typedef std::tuple<
MetaSyncFileType,
const std::string&, // relative path
std::map<std::string, std::vector<char>>, // metadata specific attribute's raw contents
bool // is deletion?
> InodeInfo;
};

View File

@@ -0,0 +1,38 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/threading/Atomics.h>
#include <common/threading/Condition.h>
#include <common/app/log/Logger.h>
/**
* Work item intended to stop all worker threads temporarily, detect that all are stopped using a
* barrier, and restarting them using the same barrier.
* Example:
* Barrier workerBarrier(numWorkers + 1);
* <insert instance of BarrierWorkItem(&workerBarrier) into personal queue of numWorkers threads>
* workerBarrier.wait(); // Wait for all workers to stop
* <do something while workers are stopped>
* workerBarrier.wait(); // restart the workers
*/
class BarrierWork : public Work
{
public:
BarrierWork(Barrier* barrier) :
barrier(barrier)
{ }
virtual ~BarrierWork() { }
void process(char*, unsigned, char*, unsigned)
{
LOG_DBG(WORKQUEUES, DEBUG, "Start blocking.");
barrier->wait();
barrier->wait();
LOG_DBG(WORKQUEUES, DEBUG, "Done.");
}
private:
Barrier* barrier;
};

View File

@@ -0,0 +1,114 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/session/opening/CloseChunkFileMsg.h>
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <program/Program.h>
#include "CloseChunkFileWork.h"
#include <boost/lexical_cast.hpp>
void CloseChunkFileWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
FhgfsOpsErr CloseChunkFileWork::communicate()
{
const char* logContext = "Close chunk file work";
App* app = Program::getApp();
// prepare request message
CloseChunkFileMsg closeMsg(sessionID, fileHandleID, targetID, pathInfoPtr);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
closeMsg.addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
{
closeMsg.addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS);
closeMsg.addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND);
}
}
closeMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &closeMsg, NETMSGTYPE_CloseChunkFileResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"Session: " + sessionID.str() + "; "
"FileHandle: " + fileHandleID);
return requestRes;
}
// correct response type received
CloseChunkFileRespMsg* closeRespMsg = (CloseChunkFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr closeRemoteRes = closeRespMsg->getResult();
// set current dynamic attribs (even if result not success, because then storageVersion==0)
if(outDynAttribs)
{
DynamicFileAttribs currentDynAttribs(closeRespMsg->getStorageVersion(),
closeRespMsg->getFileSize(), closeRespMsg->getAllocedBlocks(),
closeRespMsg->getModificationTimeSecs(), closeRespMsg->getLastAccessTimeSecs() );
*outDynAttribs = currentDynAttribs;
}
if(closeRemoteRes != FhgfsOpsErr_SUCCESS)
{ // error: chunk file not closed
int logLevel = Log_WARNING;
if(closeRemoteRes == FhgfsOpsErr_INUSE)
logLevel = Log_DEBUG; // happens on ctrl+c, so don't irritate user with these log msgs
LogContext(logContext).log(logLevel,
"Closing chunk file on target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"Error: " + boost::lexical_cast<std::string>(closeRemoteRes) + "; "
"Session: " + sessionID.str() + "; "
"FileHandle: " + std::string(fileHandleID) );
return closeRemoteRes;
}
// success: chunk file closed
LOG_DEBUG(logContext, Log_DEBUG,
"Closed chunk file on target. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"Session: " + sessionID.str() + "; "
"FileHandle: " + fileHandleID);
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,69 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/Common.h>
class CloseChunkFileWork : public Work
{
public:
/**
* @param outDynAttribs may be NULL if caller is not interested
*/
CloseChunkFileWork(const NumNodeID sessionID, const std::string& fileHandleID,
StripePattern* pattern, uint16_t targetID, PathInfo* pathInfo,
DynamicFileAttribs *outDynAttribs, FhgfsOpsErr* outResult, SynchronizedCounter* counter) :
sessionID(sessionID), fileHandleID(fileHandleID), pattern(pattern), targetID(targetID),
pathInfoPtr(pathInfo), outDynAttribs(outDynAttribs), outResult(outResult),
counter(counter), useBuddyMirrorSecond(false),
msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~CloseChunkFileWork() {}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
NumNodeID sessionID;
std::string fileHandleID;
StripePattern* pattern;
uint16_t targetID;
PathInfo* pathInfoPtr; // to find chunk files
DynamicFileAttribs* outDynAttribs;
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
bool useBuddyMirrorSecond;
unsigned msgUserID;
FhgfsOpsErr communicate();
public:
// getters & setters
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,90 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/storage/attribs/GetChunkFileAttribsMsg.h>
#include <common/net/message/storage/attribs/GetChunkFileAttribsRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <program/Program.h>
#include "GetChunkFileAttribsWork.h"
void GetChunkFileAttribsWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
/**
* @return true if communication successful
*/
FhgfsOpsErr GetChunkFileAttribsWork::communicate()
{
const char* logContext = "Stat chunk file work";
App* app = Program::getApp();
GetChunkFileAttribsMsg getSizeMsg(entryID, targetID, pathInfo);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
getSizeMsg.addMsgHeaderFeatureFlag(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
getSizeMsg.addMsgHeaderFeatureFlag(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR_SECOND);
}
getSizeMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &getSizeMsg, NETMSGTYPE_GetChunkFileAttribsResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{ // communication error
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return requestRes;
}
// correct response type received
auto* getSizeRespMsg = (GetChunkFileAttribsRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr getSizeResult = getSizeRespMsg->getResult();
if(getSizeResult != FhgfsOpsErr_SUCCESS)
{ // error: chunk file not unlinked
LogContext(logContext).log(Log_WARNING,
"Getting chunk file attributes from target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return getSizeResult;
}
// success: chunk file dynamic attribs refreshed
DynamicFileAttribs currentDynAttribs(getSizeRespMsg->getStorageVersion(),
getSizeRespMsg->getSize(), getSizeRespMsg->getAllocedBlocks(),
getSizeRespMsg->getModificationTimeSecs(), getSizeRespMsg->getLastAccessTimeSecs() );
*outDynAttribs = currentDynAttribs;
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,64 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/PathInfo.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/Common.h>
class GetChunkFileAttribsWork : public Work
{
public:
/**
* @param pathInfo: Only as reference pointer, not owned by this object
*/
GetChunkFileAttribsWork(const std::string& entryID, StripePattern* pattern, uint16_t targetID,
PathInfo* pathInfo, DynamicFileAttribs *outDynAttribs, FhgfsOpsErr* outResult,
SynchronizedCounter* counter) : entryID(entryID), pattern(pattern), targetID(targetID),
pathInfo(pathInfo), outDynAttribs(outDynAttribs), outResult(outResult), counter(counter),
useBuddyMirrorSecond(false), msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~GetChunkFileAttribsWork()
{
}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
std::string entryID;
StripePattern* pattern;
uint16_t targetID;
PathInfo *pathInfo; // only as reference ptr, not owned by this object!
DynamicFileAttribs* outDynAttribs;
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
bool useBuddyMirrorSecond;
unsigned msgUserID; // only used for msg header info
FhgfsOpsErr communicate();
public:
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,218 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/net/message/control/AckMsg.h>
#include <common/net/message/session/locking/LockGrantedMsg.h>
#include <common/net/message/NetMessage.h>
#include <program/Program.h>
#include "LockEntryNotificationWork.h"
#include <mutex>
Mutex LockEntryNotificationWork::ackCounterMutex;
unsigned LockEntryNotificationWork::ackCounter = 0;
void LockEntryNotificationWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
/* note: this code is very similar to LockRangeNotificationWork, so if you change something here,
you probably want to change it there, too. */
const char* logContext = "LockEntryNotificationWork::process";
App* app = Program::getApp();
Logger* logger = Logger::getLogger();
Config* cfg = app->getConfig();
AcknowledgmentStore* ackStore = app->getAckStore();
DatagramListener* dgramLis = app->getDatagramListener();
MetaStore* metaStore = app->getMetaStore();
NodeStoreClients* clients = app->getClientNodes();
NumNodeID localNodeID = app->getLocalNode().getNumID();
// max total time is ackWaitMS * numRetries, defaults to 333ms * 15 => 5s
int ackWaitSleepMS = cfg->getTuneLockGrantWaitMS();
int numRetriesLeft = cfg->getTuneLockGrantNumRetries();
WaitAckMap waitAcks;
WaitAckMap receivedAcks;
WaitAckNotification notifier;
bool allAcksReceived = false;
// note: we use uint for tv_sec (not uint64) because 32 bits are enough here
// gives string like this: "time-counter-elck-"
std::string ackIDPrefix =
StringTk::uintToHexStr(TimeAbs().getTimeval()->tv_sec) + "-" +
StringTk::uintToHexStr(incAckCounter() ) + "-" "elck" "-";
if (notifyList.empty())
return; // nothing to be done
// create and register waitAcks
/* note: waitAcks store pointers to notifyList items, so make sure to not remove anything from
the list while we're still using the waitAcks pointers */
for (LockEntryNotifyListIter iter = notifyList.begin(); iter != notifyList.end(); iter++)
{
std::string ackID = ackIDPrefix + iter->lockAckID; // (we assume lockAckID is globally unique)
WaitAck waitAck(ackID, &(*iter) );
waitAcks.insert(WaitAckMapVal(ackID, waitAck) );
}
ackStore->registerWaitAcks(&waitAcks, &receivedAcks, &notifier);
// loop: send requests -> waitforcompletion -> resend
while(numRetriesLeft && !app->getSelfTerminate() )
{
// create waitAcks copy
WaitAckMap currentWaitAcks;
{
const std::lock_guard<Mutex> lock (notifier.waitAcksMutex);
currentWaitAcks = waitAcks;
}
// send messages
for(WaitAckMapIter iter = currentWaitAcks.begin(); iter != currentWaitAcks.end(); iter++)
{
EntryLockDetails* lockDetails = (EntryLockDetails*)iter->second.privateData;
LockGrantedMsg msg(lockDetails->lockAckID, iter->first, localNodeID);
std::pair<bool, unsigned> serializeRes = msg.serializeMessage(bufOut, bufOutLen);
if(unlikely(!serializeRes.first) )
{ // buffer too small - should never happen
logger->log(Log_CRITICAL, logContext, "BUG(?): Buffer too small for message "
"serialization: " + StringTk::intToStr(bufOutLen) + "/" +
StringTk::intToStr(serializeRes.second) );
continue;
}
auto node = clients->referenceNode(lockDetails->clientNumID);
if(unlikely(!node) )
{ // node not exists
logger->log(Log_DEBUG, logContext, "Cannot grant lock to unknown client: " +
lockDetails->clientNumID.str());
continue;
}
dgramLis->sendBufToNode(*node, bufOut, serializeRes.second);
}
// wait for acks
allAcksReceived = ackStore->waitForAckCompletion(&currentWaitAcks, &notifier, ackWaitSleepMS);
if(allAcksReceived)
break; // all acks received
// some waitAcks left => prepare next loop
numRetriesLeft--;
}
// waiting for acks is over
ackStore->unregisterWaitAcks(&waitAcks);
// check and handle results (waitAcks now contains all unreceived acks)
if (waitAcks.empty())
{
LOG_DBG(GENERAL, DEBUG, "Stats: received all acks.", receivedAcks.size(), notifyList.size());
return; // perfect, all acks received
}
// some acks were missing...
logger->log(Log_DEBUG, logContext, "Some replies to lock grants missing. Received: " +
StringTk::intToStr(receivedAcks.size() ) + "/" +
StringTk::intToStr(receivedAcks.size() + waitAcks.size() ) );
// the inode is supposed to be be referenced already
MetaFileHandle inode = metaStore->referenceLoadedFile(this->parentEntryID, this->isBuddyMirrored,
this->entryID);
if(unlikely(!inode) )
{ // locked inode cannot be referenced
logger->log(Log_DEBUG, logContext, "FileID cannot be referenced (file unlinked?): " +
this->entryID);
return;
}
// unlock all locks for which we didn't receive an ack
for(WaitAckMapIter iter = waitAcks.begin(); iter != waitAcks.end(); iter++)
{
EntryLockDetails* lockDetails = (EntryLockDetails*)iter->second.privateData;
unlockWaiter(*inode, lockDetails);
LOG_DEBUG(logContext, Log_DEBUG, "Reply was missing from: " + lockDetails->clientNumID.str());
}
// cancel all remaining lock waiters if too many acks were missing
// (this is very important to avoid long timeouts if multiple clients are gone/disconnected)
if(waitAcks.size() > 1)
{ // cancel all waiters
cancelAllWaiters(*inode);
}
// cleanup
metaStore->releaseFile(this->parentEntryID, inode);
}
/**
* Remove lock of a waiter from which we didn't receive an ack.
*/
void LockEntryNotificationWork::unlockWaiter(FileInode& inode, EntryLockDetails* lockDetails)
{
lockDetails->setUnlock();
if(lockType == LockEntryNotifyType_APPEND)
inode.flockAppend(*lockDetails);
else
if(lockType == LockEntryNotifyType_FLOCK)
inode.flockEntry(*lockDetails);
else
LOG(GENERAL, ERR, "Invalid lockType given.", lockType);
}
/**
* Cancel all remaining lock waiters.
*
* Usually called because too many acks were not received and we want to avoid repeated long
* timeout stalls.
*/
void LockEntryNotificationWork::cancelAllWaiters(FileInode& inode)
{
if(lockType == LockEntryNotifyType_APPEND)
inode.flockAppendCancelAllWaiters();
else
if(lockType == LockEntryNotifyType_FLOCK)
inode.flockEntryCancelAllWaiters();
else
LOG(GENERAL, ERR, "Invalid lockType given.", lockType);
}
unsigned LockEntryNotificationWork::incAckCounter()
{
const std::lock_guard<Mutex> lock(ackCounterMutex);
return ackCounter++;
}
Mutex* LockEntryNotificationWork::getDGramLisMutex(AbstractDatagramListener* dgramLis)
{
return dgramLis->getSendMutex();
}

View File

@@ -0,0 +1,57 @@
#pragma once
#include <common/Common.h>
#include <common/components/worker/Work.h>
#include <common/components/AbstractDatagramListener.h>
#include <common/storage/StorageErrors.h>
#include <storage/Locking.h>
class FileInode; // forward declaration
typedef std::list<EntryLockDetails> LockEntryNotifyList;
typedef LockEntryNotifyList::iterator LockEntryNotifyListIter;
typedef LockEntryNotifyList::const_iterator LockEntryNotifyListCIter;
class LockEntryNotificationWork : public Work
{
public:
/**
* @param notifyList will be owned and freed by this object, so do not use or free it after
* calling this.
*/
LockEntryNotificationWork(LockEntryNotifyType lockType, const std::string& parentEntryID,
const std::string& entryID, bool isBuddyMirrored, LockEntryNotifyList notifyList) :
lockType(lockType), parentEntryID(parentEntryID), entryID(entryID),
isBuddyMirrored(isBuddyMirrored), notifyList(std::move(notifyList))
{
}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
// static attributes & methods
static Mutex ackCounterMutex;
static unsigned ackCounter;
static unsigned incAckCounter();
// instance attributes & methods
LockEntryNotifyType lockType;
std::string parentEntryID;
std::string entryID;
bool isBuddyMirrored;
LockEntryNotifyList notifyList;
void unlockWaiter(FileInode& inode, EntryLockDetails* lockDetails);
void cancelAllWaiters(FileInode& inode);
Mutex* getDGramLisMutex(AbstractDatagramListener* dgramLis);
};

View File

@@ -0,0 +1,185 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/net/message/control/AckMsg.h>
#include <common/net/message/session/locking/LockGrantedMsg.h>
#include <common/net/message/NetMessage.h>
#include <program/Program.h>
#include "LockRangeNotificationWork.h"
#include <mutex>
Mutex LockRangeNotificationWork::ackCounterMutex;
unsigned LockRangeNotificationWork::ackCounter = 0;
void LockRangeNotificationWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
/* note: this code is very similar to LockEntryNotificationWork, so if you change something here,
you probably want to change it there, too. */
const char* logContext = __func__;
App* app = Program::getApp();
Logger* logger = Logger::getLogger();
Config* cfg = app->getConfig();
AcknowledgmentStore* ackStore = app->getAckStore();
DatagramListener* dgramLis = app->getDatagramListener();
MetaStore* metaStore = app->getMetaStore();
NodeStoreClients* clients = app->getClientNodes();
NumNodeID localNodeID = app->getLocalNode().getNumID();
// max total time is ackWaitMS * numRetries, defaults to 333ms * 15 => 5s
int ackWaitSleepMS = cfg->getTuneLockGrantWaitMS();
int numRetriesLeft = cfg->getTuneLockGrantNumRetries();
WaitAckMap waitAcks;
WaitAckMap receivedAcks;
WaitAckNotification notifier;
bool allAcksReceived = false;
// note: we use uint for tv_sec (not uint64) because 32 bits are enough here
std::string ackIDPrefix =
StringTk::uintToHexStr(TimeAbs().getTimeval()->tv_sec) + "-" +
StringTk::uintToHexStr(incAckCounter() ) + "-"
"rlck" "-";
if (notifyList.empty())
return; // nothing to be done
// create and register waitAcks
/* note: waitAcks store pointers to notifyList items, so make sure to not remove anything from
the list while we're still using the waitAcks pointers */
for (LockRangeNotifyListIter iter = notifyList.begin(); iter != notifyList.end(); iter++)
{
std::string ackID = ackIDPrefix + iter->lockAckID; // (we assume lockAckID is globally unique)
WaitAck waitAck(ackID, &(*iter) );
waitAcks.insert(WaitAckMapVal(ackID, waitAck) );
}
ackStore->registerWaitAcks(&waitAcks, &receivedAcks, &notifier);
// loop: send requests -> waitforcompletion -> resend
while(numRetriesLeft && !app->getSelfTerminate() )
{
// create waitAcks copy
WaitAckMap currentWaitAcks;
{
const std::lock_guard<Mutex> lock(notifier.waitAcksMutex);
currentWaitAcks = waitAcks;
}
// send messages
for(WaitAckMapIter iter = currentWaitAcks.begin(); iter != currentWaitAcks.end(); iter++)
{
RangeLockDetails* lockDetails = (RangeLockDetails*)iter->second.privateData;
LockGrantedMsg msg(lockDetails->lockAckID, iter->first, localNodeID);
std::pair<bool, unsigned> serializeRes = msg.serializeMessage(bufOut, bufOutLen);
if(unlikely(!serializeRes.first) )
{ // buffer too small - should never happen
logger->log(Log_CRITICAL, logContext, "BUG(?): Buffer too small for message "
"serialization: " + StringTk::intToStr(bufOutLen) + "/" +
StringTk::intToStr(serializeRes.second) );
continue;
}
auto node = clients->referenceNode(lockDetails->clientNumID);
if(unlikely(!node) )
{ // node not exists
logger->log(Log_DEBUG, logContext, "Cannot grant lock to unknown client: " +
lockDetails->clientNumID.str());
continue;
}
dgramLis->sendBufToNode(*node, bufOut, serializeRes.second);
}
// wait for acks
allAcksReceived = ackStore->waitForAckCompletion(&currentWaitAcks, &notifier, ackWaitSleepMS);
if(allAcksReceived)
break; // all acks received
// some waitAcks left => prepare next loop
numRetriesLeft--;
}
// waiting for acks is over
ackStore->unregisterWaitAcks(&waitAcks);
// check and handle results (waitAcks now contains all unreceived acks)
if (waitAcks.empty())
{
LOG_DBG(GENERAL, DEBUG, "Stats: received all acks.", receivedAcks.size(), notifyList.size());
return; // perfect, all acks received
}
// some acks were missing...
logger->log(Log_DEBUG, logContext, "Some replies to lock grants missing. Received: " +
StringTk::intToStr(receivedAcks.size() ) + "/" +
StringTk::intToStr(receivedAcks.size() + waitAcks.size() ) );
// the inode is supposed to be be referenced already
MetaFileHandle inode = metaStore->referenceLoadedFile(this->parentEntryID, this->isBuddyMirrored,
this->entryID);
if(unlikely(!inode) )
{ // locked inode cannot be referenced
logger->log(Log_DEBUG, logContext, "FileID cannot be referenced (file unlinked?): "
+ this->entryID);
return;
}
// unlock all locks for which we didn't receive an ack
for(WaitAckMapIter iter = waitAcks.begin(); iter != waitAcks.end(); iter++)
{
RangeLockDetails* lockDetails = (RangeLockDetails*)iter->second.privateData;
lockDetails->setUnlock();
inode->flockRange(*lockDetails);
LOG_DEBUG(logContext, Log_DEBUG, "Reply was missing from: " + lockDetails->clientNumID.str());
}
// cancel all remaining lock waiters if too many acks were missing
// (this is very important to avoid long timeouts it multiple clients are gone/disconnected)
if(waitAcks.size() > 1)
{ // cancel all waiters
inode->flockRangeCancelAllWaiters();
}
// cleanup
metaStore->releaseFile(this->parentEntryID, inode);
}
unsigned LockRangeNotificationWork::incAckCounter()
{
const std::lock_guard<Mutex> lock(ackCounterMutex);
return ackCounter++;
}
Mutex* LockRangeNotificationWork::getDGramLisMutex(AbstractDatagramListener* dgramLis)
{
return dgramLis->getSendMutex();
}

View File

@@ -0,0 +1,49 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/components/AbstractDatagramListener.h>
#include <common/Common.h>
typedef std::list<RangeLockDetails> LockRangeNotifyList;
typedef LockRangeNotifyList::iterator LockRangeNotifyListIter;
typedef LockRangeNotifyList::const_iterator LockRangeNotifyListCIter;
class LockRangeNotificationWork : public Work
{
public:
/**
* @param notifyList will be owned and freed by this object, so do not use or free it after
* calling this.
*/
LockRangeNotificationWork(const std::string& parentEntryID, const std::string& entryID,
bool isBuddyMirrored, LockRangeNotifyList notifyList):
parentEntryID(parentEntryID), entryID(entryID), isBuddyMirrored(isBuddyMirrored),
notifyList(std::move(notifyList))
{
/* all assignments done in initializer list */
}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
// static attributes & methods
static Mutex ackCounterMutex;
static unsigned ackCounter;
static unsigned incAckCounter();
// instance attributes & methods
std::string parentEntryID;
std::string entryID;
bool isBuddyMirrored;
LockRangeNotifyList notifyList;
Mutex* getDGramLisMutex(AbstractDatagramListener* dgramLis);
};

View File

@@ -0,0 +1,97 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/storage/attribs/SetLocalAttrMsg.h>
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <components/worker/SetChunkFileAttribsWork.h>
#include <program/Program.h>
void SetChunkFileAttribsWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
FhgfsOpsErr SetChunkFileAttribsWork::communicate()
{
const char* logContext = "Set chunk file attribs work";
App* app = Program::getApp();
SetLocalAttrMsg setAttrMsg(entryID, targetID, pathInfo, validAttribs, attribs, enableCreation);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
setAttrMsg.addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
setAttrMsg.addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND);
}
if(quotaChown)
setAttrMsg.addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_USE_QUOTA);
setAttrMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &setAttrMsg, NETMSGTYPE_SetLocalAttrResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{ // communication error
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"entryID: " + entryID);
return requestRes;
}
// correct response type received
const auto setRespMsg = (const SetLocalAttrRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr setRespVal = setRespMsg->getResult();
if(setRespVal != FhgfsOpsErr_SUCCESS)
{ // error occurred
LogContext(logContext).log(Log_WARNING,
"Setting chunk file attributes on target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return setRespVal;
}
// success
LOG_DEBUG(logContext, Log_DEBUG,
"Set attribs of chunk file on target. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
if ((outDynamicAttribs)
&& (setRespMsg->isMsgHeaderFeatureFlagSet(SETLOCALATTRRESPMSG_FLAG_HAS_ATTRS)))
{
setRespMsg->getDynamicAttribs(outDynamicAttribs);
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,79 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/storage/striping/StripePattern.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/Common.h>
class SetChunkFileAttribsWork : public Work
{
public:
/**
* @param pathInfo just a reference, so do not free it as long as you use this object!
*/
SetChunkFileAttribsWork(const std::string& entryID, int validAttribs,
SettableFileAttribs* attribs, bool enableCreation, StripePattern* pattern,
uint16_t targetID, PathInfo* pathInfo, DynamicFileAttribs* outDynamicAttribs,
FhgfsOpsErr* outResult, SynchronizedCounter* counter) :
entryID(entryID), validAttribs(validAttribs), attribs(attribs),
enableCreation(enableCreation), pattern(pattern), targetID(targetID),
pathInfo(pathInfo), outDynamicAttribs(outDynamicAttribs),
outResult(outResult), counter(counter), quotaChown(false),
useBuddyMirrorSecond(false), msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~SetChunkFileAttribsWork() {}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
std::string entryID;
int validAttribs;
SettableFileAttribs* attribs;
bool enableCreation;
StripePattern* pattern;
uint16_t targetID;
PathInfo* pathInfo;
DynamicFileAttribs* outDynamicAttribs; // will hold the chunks dynamic attribs as stat'ed on
// the storage server
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
bool quotaChown;
bool useBuddyMirrorSecond;
unsigned msgUserID; // only used for msg header info
FhgfsOpsErr communicate();
public:
// getters & setters
void setQuotaChown(bool quotaChown)
{
this->quotaChown = quotaChown;
}
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,111 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/storage/TruncLocalFileMsg.h>
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <components/worker/TruncChunkFileWork.h>
#include <program/Program.h>
#include <boost/lexical_cast.hpp>
void TruncChunkFileWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
FhgfsOpsErr TruncChunkFileWork::communicate()
{
const char* logContext = "Trunc chunk file work";
App* app = Program::getApp();
TruncLocalFileMsg truncMsg(filesize, entryID, targetID, pathInfo);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
truncMsg.addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
{
truncMsg.addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_NODYNAMICATTRIBS);
truncMsg.addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
}
}
if(useQuota)
truncMsg.setUserdataForQuota(userID, groupID);
truncMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &truncMsg, NETMSGTYPE_TruncLocalFileResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{ // communication error
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return requestRes;
}
// correct response type received
TruncLocalFileRespMsg* truncRespMsg = (TruncLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr truncRespVal = truncRespMsg->getResult();
// set current dynamic attribs (even if result not success, because then storageVersion==0)
if(outDynAttribs)
{
DynamicFileAttribs currentDynAttribs(truncRespMsg->getStorageVersion(),
truncRespMsg->getFileSize(), truncRespMsg->getAllocedBlocks(),
truncRespMsg->getModificationTimeSecs(), truncRespMsg->getLastAccessTimeSecs() );
*outDynAttribs = currentDynAttribs;
}
if(unlikely(truncRespVal != FhgfsOpsErr_SUCCESS) )
{ // error: chunk file not truncated
if(truncRespVal == FhgfsOpsErr_TOOBIG)
return truncRespVal; // will be passed through to user app on client, so don't log here
LogContext(logContext).log(Log_WARNING,
"Truncation of chunk file on target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID + "; "
"Error: " + boost::lexical_cast<std::string>(truncRespVal) );
return truncRespVal;
}
// success: chunk file truncated
LOG_DEBUG(logContext, Log_DEBUG,
"Chunk file truncated on target. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,78 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/Common.h>
/**
* Truncate file on storage servers
*/
class TruncChunkFileWork : public Work
{
public:
/**
* @param outDynAttribs may be NULL if caller is not interested
*/
TruncChunkFileWork(const std::string& entryID, int64_t filesize, StripePattern* pattern,
uint16_t targetID, PathInfo* pathInfo, DynamicFileAttribs *outDynAttribs,
FhgfsOpsErr* outResult, SynchronizedCounter* counter) :
entryID(entryID), filesize(filesize), pattern(pattern), targetID(targetID),
pathInfo(pathInfo), outDynAttribs(outDynAttribs), outResult(outResult), counter(counter),
useQuota(false), useBuddyMirrorSecond(false), msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~TruncChunkFileWork() {}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
std::string entryID;
int64_t filesize; // already converted to storage node's local file size
StripePattern* pattern;
uint16_t targetID;
PathInfo* pathInfo; // note: not owned by this object
DynamicFileAttribs* outDynAttribs;
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
unsigned userID;
unsigned groupID;
bool useQuota;
bool useBuddyMirrorSecond;
unsigned msgUserID; // only used for msg header info
FhgfsOpsErr communicate();
public:
// getters & setters
void setUserdataForQuota(unsigned userID, unsigned groupID)
{
this->useQuota = true;
this->userID = userID;
this->groupID = groupID;
}
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,87 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/storage/creating/UnlinkLocalFileMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <components/worker/UnlinkChunkFileWork.h>
#include <program/Program.h>
void UnlinkChunkFileWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
FhgfsOpsErr UnlinkChunkFileWork::communicate()
{
const char* logContext = "Unlink chunk file work";
App* app = Program::getApp();
UnlinkLocalFileMsg unlinkMsg(entryID, targetID, pathInfo);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
unlinkMsg.addMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
unlinkMsg.addMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
}
unlinkMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &unlinkMsg, NETMSGTYPE_UnlinkLocalFileResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{ // communication error
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return requestRes;
}
// correct response type received
UnlinkLocalFileRespMsg* unlinkRespMsg = (UnlinkLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr unlinkResult = unlinkRespMsg->getResult();
if(unlinkResult != FhgfsOpsErr_SUCCESS)
{ // error: local file not unlinked
LogContext(logContext).log(Log_WARNING,
"Unlinking of chunk file from target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return unlinkResult;
}
// success: chunk file unlinked
LOG_DEBUG(logContext, Log_DEBUG,
"Chunk file unlinked from target. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,62 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/PathInfo.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/Common.h>
class UnlinkChunkFileWork : public Work
{
public:
/**
* @param pathInfo just a reference, so do not free it as long as you use this object!
*/
UnlinkChunkFileWork(const std::string& entryID, StripePattern* pattern, uint16_t targetID,
PathInfo* pathInfo, FhgfsOpsErr* outResult, SynchronizedCounter* counter) :
entryID(entryID), pattern(pattern), targetID(targetID), pathInfo(pathInfo),
outResult(outResult), counter(counter), useBuddyMirrorSecond(false),
msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~UnlinkChunkFileWork() {}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
std::string entryID;
StripePattern* pattern;
uint16_t targetID;
PathInfo* pathInfo;
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
bool useBuddyMirrorSecond;
unsigned msgUserID;
FhgfsOpsErr communicate();
public:
// getters & setters
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,445 @@
#pragma once
#include <app/App.h>
#include <common/app/log/Logger.h>
#include <common/components/streamlistenerv2/IncomingPreprocessedMsgWork.h>
#include <common/net/message/session/AckNotifyMsg.h>
#include <common/net/message/session/AckNotifyRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/DebugVariable.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include <session/MirrorMessageResponseState.h>
#include <toolkit/BuddyCommTk.h>
template<typename BaseT, typename LockStateT>
class MirroredMessage : public BaseT
{
protected:
typedef MirroredMessage BaseType;
BuddyResyncJob* resyncJob;
LockStateT lockState;
MirroredMessage():
resyncJob(nullptr)
{}
virtual FhgfsOpsErr processSecondaryResponse(NetMessage& resp) = 0;
virtual const char* mirrorLogContext() const = 0;
virtual std::unique_ptr<MirroredMessageResponseState> executeLocally(
NetMessage::ResponseContext& ctx, bool isSecondary) = 0;
virtual bool isMirrored() = 0;
// IMPORTANT NOTE ON LOCKING ORDER:
// * always take locks the order
// - HashDirLock
// - DirIDLock
// - ParentNameLock
// - FileIDLock
// * always take locks of each type with the order induced by:
// - HashDirLock: id
// - DirIDLock: (id, forWrite)
// - ParentNameLock: (parentID, name)
// - FileIDLock: id
//
// not doing this may result in deadlocks.
virtual LockStateT lock(EntryLockStore& store) = 0;
virtual void forwardToSecondary(NetMessage::ResponseContext& ctx) = 0;
virtual bool processIncoming(NetMessage::ResponseContext& ctx)
{
Session* session = nullptr;
bool isNewState = true;
if (isMirrored() && !this->hasFlag(NetMessageHeader::Flag_BuddyMirrorSecond))
{
if (Program::getApp()->getInternodeSyncer()->getResyncInProgress())
resyncJob = Program::getApp()->getBuddyResyncer()->getResyncJob();
lockState = lock(*Program::getApp()->getMirroredSessions()->getEntryLockStore());
}
// make sure that the thread change set is *always* cleared when we leave this method.
struct _ClearChangeSet {
~_ClearChangeSet()
{
if (BuddyResyncer::getSyncChangeset())
{
LOG(MIRRORING, WARNING, "Abandoning sync changeset");
BuddyResyncer::abandonSyncChangeset();
}
}
} _clearChangeSet;
(void) _clearChangeSet;
mirrorState.reset();
if (isMirrored())
{
const auto nodeID = this->getRequestorID(ctx).second;
session = Program::getApp()->getMirroredSessions()->referenceSession(nodeID, true);
}
if (isMirrored() && this->hasFlag(NetMessageHeader::Flag_HasSequenceNumber))
{
// special case: client has not been told where to start its sequence. in this case,
// we want to answer with only the new seqNoBase for the client, and do NO processing.
if (this->getSequenceNumber() == 0)
{
GenericResponseMsg response(GenericRespMsgCode_NEWSEQNOBASE, "New seqNoBase");
response.addFlag(NetMessageHeader::Flag_HasSequenceNumber);
response.setSequenceNumber(session->getSeqNoBase());
ctx.sendResponse(response);
goto exit;
}
// a note on locking of mirrorState. since clients process each request in only one
// thread, per client we can have only one request for a given sequence number at any
// given time. retries may reuse the same sequence number, and they may be processed in
// a different thread on the server, but no two threads process the same sequence number
// from the same client at the same time. thus, no locking for the actual structure is
// needed, but extra memory barriers to ensure propagation of results between threads
// are necessary.
__sync_synchronize();
if (this->hasFlag(NetMessageHeader::Flag_IsSelectiveAck))
std::tie(mirrorState, isNewState) = session->acquireMirrorStateSlotSelective(
this->getSequenceNumberDone(),
this->getSequenceNumber());
else
std::tie(mirrorState, isNewState) = session->acquireMirrorStateSlot(
this->getSequenceNumberDone(),
this->getSequenceNumber());
}
if (!isNewState)
{
if (mirrorState->response)
mirrorState->response->sendResponse(ctx);
else
ctx.sendResponse(
GenericResponseMsg(
GenericRespMsgCode_TRYAGAIN,
"Request for same sequence number is currently in progress"));
}
else
{
if (resyncJob && resyncJob->isRunning())
{
BuddyResyncer::registerSyncChangeset();
resyncJob->registerOps();
}
auto responseState = executeLocally(ctx,
isMirrored() && this->hasFlag(NetMessageHeader::Flag_BuddyMirrorSecond));
// responseState may ne null if the message has called earlyComplete(). do not finish
// the operation twice in this case.
if (responseState)
finishOperation(ctx, std::move(responseState));
}
exit:
if (session)
Program::getApp()->getMirroredSessions()->releaseSession(session);
return true;
}
template<typename ResponseT>
void earlyComplete(NetMessage::ResponseContext& ctx, ResponseT&& state)
{
finishOperation(ctx, boost::make_unique<ResponseT>(std::move(state)));
Socket* sock = ctx.getSocket();
IncomingPreprocessedMsgWork::releaseSocket(Program::getApp(), &sock, this);
}
void buddyResyncNotify(NetMessage::ResponseContext& ctx, bool stateChanged)
{
// pairs with the memory barrier before acquireMirrorStateSlot
__sync_synchronize();
if (BuddyResyncer::getSyncChangeset())
{
if (isMirrored() &&
!this->hasFlag(NetMessageHeader::Flag_BuddyMirrorSecond) &&
stateChanged)
BuddyResyncer::commitThreadChangeSet();
else
BuddyResyncer::abandonSyncChangeset();
}
}
void finishOperation(NetMessage::ResponseContext& ctx,
std::unique_ptr<MirroredMessageResponseState> state)
{
auto* responsePtr = state.get();
if (isMirrored() &&
!this->hasFlag(NetMessageHeader::Flag_BuddyMirrorSecond) &&
state)
{
if (state->changesObservableState())
forwardToSecondary(ctx);
else
notifySecondaryOfACK(ctx);
}
if (mirrorState)
mirrorState->response = std::move(state);
// pairs with the memory barrier before acquireMirrorStateSlot
__sync_synchronize();
if (BuddyResyncer::getSyncChangeset())
{
resyncJob = Program::getApp()->getBuddyResyncer()->getResyncJob();
if (isMirrored() &&
!this->hasFlag(NetMessageHeader::Flag_BuddyMirrorSecond) &&
responsePtr &&
responsePtr->changesObservableState())
BuddyResyncer::commitThreadChangeSet();
else
BuddyResyncer::abandonSyncChangeset();
resyncJob->unregisterOps();
}
if (responsePtr)
responsePtr->sendResponse(ctx);
lockState = {};
}
void notifySecondaryOfACK(NetMessage::ResponseContext& ctx)
{
AckNotifiyMsg msg;
// if the secondary does not respond with SUCCESS, it will automatically be set to
// needs-resync. eventually, resync will clear the secondary sessions entirely, which will
// also flush the sequence number store.
sendToSecondary(ctx, msg, NETMSGTYPE_AckNotifyResp);
}
virtual void prepareMirrorRequestArgs(RequestResponseArgs& args)
{
}
template<typename T>
void sendToSecondary(NetMessage::ResponseContext& ctx, MirroredMessageBase<T>& message,
unsigned respType, FhgfsOpsErr expectedResult = FhgfsOpsErr_SUCCESS)
{
App* app = Program::getApp();
NodeStoreServers* metaNodes = app->getMetaNodes();
MirrorBuddyGroupMapper* buddyGroups = app->getMetaBuddyGroupMapper();
DEBUG_ENV_VAR(unsigned, FORWARD_DELAY, 0, "BEEGFS_FORWARD_DELAY_SECS");
if (FORWARD_DELAY)
sleep(FORWARD_DELAY);
// if a resync is currently running, abort right here, immediatly. we do not need to know
// the exact state of the buddy: a resync is running. it's bad.
if (app->getInternodeSyncer()->getResyncInProgress())
return;
// check whether the secondary is viable at all: if it is not online and good,
// communicating will not do any good. even online/needs-resync must be skipped, because
// the resyncer must be the only entitity that changes the secondary as long as it is not
// good yet.
{
CombinedTargetState secondaryState;
NumNodeID secondaryID(buddyGroups->getSecondaryTargetID(
buddyGroups->getLocalGroupID()));
bool getStateRes = app->getMetaStateStore()->getState(secondaryID.val(),
secondaryState);
// if the secondary is anything except online/good, set it to needs-resync immediately.
// whenever we pass this point, the secondary will have missed *something* of
// importance, so anything except online/good must be set to needs-resync right here.
if (!getStateRes
|| secondaryState.reachabilityState != TargetReachabilityState_ONLINE
|| secondaryState.consistencyState != TargetConsistencyState_GOOD)
{
auto* const resyncer = app->getBuddyResyncer();
auto* const job = resyncer->getResyncJob();
// if we have no job or a running job, we must start a resync soon. if we have a
// job that has finished successfully, the management server may not have noticed
// that the secondary is completely resynced, so our buddys state may well not be
// GOOD even though we have resynced completely. we may assume that a successful
// resync implies that the buddy is good, even if the management server thinks it
// isn't.
if (!job ||
(!job->isRunning() && job->getState() != BuddyResyncJobState_SUCCESS))
{
setBuddyNeedsResync();
return;
}
}
}
RequestResponseArgs rrArgs(NULL, &message, respType);
RequestResponseNode rrNode(NumNodeID(buddyGroups->getLocalGroupID()), metaNodes);
rrNode.setMirrorInfo(buddyGroups, true);
rrNode.setTargetStates(app->getMetaStateStore());
prepareMirrorRequestArgs(rrArgs);
// copy sequence numbers and set original requestor info for secondary
message.setSequenceNumber(this->getSequenceNumber());
message.setSequenceNumberDone(this->getSequenceNumberDone());
message.setRequestorID(this->getRequestorID(ctx));
// (almost) all messages do some sort of statistics gathering by user ID
message.setMsgHeaderUserID(this->getMsgHeaderUserID());
// set flag here instead of at the beginning because &message == this is often used
message.addFlag(NetMessageHeader::Flag_BuddyMirrorSecond);
message.addFlag(this->getFlags() & NetMessageHeader::Flag_IsSelectiveAck);
message.addFlag(this->getFlags() & NetMessageHeader::Flag_HasSequenceNumber);
FhgfsOpsErr commRes = MessagingTk::requestResponseNode(&rrNode, &rrArgs);
message.removeFlag(NetMessageHeader::Flag_BuddyMirrorSecond);
if (commRes != FhgfsOpsErr_SUCCESS)
{
// since we have reached this point, the secondary has indubitably not received
// important information from the primary. we now have two choices to keep the system
// in a consistent, safe state:
//
// 1) set the secondary to needs-resync
// 2) rollback the modifications we have made and let the client retry, hoping that
// some future communication with the secondary is successful
//
// 2 is not a viable option: since some operations may move data off of this metadata
// server and onto another one completely; allowing these to be undone requires a
// two-phase commit protocol, which incurs large communication overhead for a
// (hopefully) very rare error case. other operations delete local state (eg unlink,
// or close of an unlinked file), which would have to be held in limbo until either a
// commit or a rollback is issued.
//
// since we assume that communication errors are very rare, option 1 is the most
// efficient in the general case (as it does not have to keep objects alive past their
// intended lifetimes), so we set the secondary to needs-resync on any kind of
// communication error.
// other errors, e.g. out-of-memory conditions or errors caused by streamout hooks, are
// also assumed to be rare. if any of these happens, the secondary must be resynced no
// matter what actually happened. since the operations itself succeeded, we cannot send
// a notification about the communication error either - we'd have to drop the operation
// result to do that.
#ifdef BEEGFS_DEBUG
int buddyNodeID = buddyGroups->getBuddyTargetID(app->getLocalNodeNumID().val());
LOG_CTX(MIRRORING, DEBUG, mirrorLogContext(), "Communication with secondary failed. "
"Resync will be required when secondary comes back", buddyNodeID, commRes);
#endif
setBuddyNeedsResync();
return;
}
FhgfsOpsErr respMsgRes = processSecondaryResponse(*rrArgs.outRespMsg);
if (respMsgRes != expectedResult)
{
// whoops; primary and secondary did different things; if secondary is not resyncing
// AND communication was good this is concerning (result must have been success on
// primary, otherwise no forwarding would have happened).
// usually, this would mean that primary and secondary do not have the same state, or
// that the secondary has some kind of system error. (if the primary had a system error,
// it would be more likely to fail than to succeed).
// in either case, the secondary should be resynced, even if the primary experienced
// a hardware fault or similar errors: at this point, we can no longer differentiate
// between good and bad state on the primary, and the secondary may be arbitrarily out
// of sync.
LOG_CTX(MIRRORING, NOTICE, mirrorLogContext(),
"Different return codes from primary and secondary buddy. "
"Setting secondary to needs-resync.",
("Expected response", expectedResult),
("Received response", respMsgRes));
setBuddyNeedsResync();
}
}
// inodes that are changes during mirrored processing on the secondary (eg file creation or
// deletion, setxattr, etc) may have timestamps changes to a different value than the primary.
// to remedy this, the secondary must explicitly set these timestamps during processing.
bool shouldFixTimestamps()
{
return isMirrored() && Program::getApp()->getConfig()->getTuneMirrorTimestamps();
}
void fixInodeTimestamp(DirInode& inode, MirroredTimestamps& ts)
{
if (!isMirrored())
return;
BEEGFS_BUG_ON_DEBUG(!inode.getIsLoaded(), "inode not loaded");
StatData stat;
inode.getStatData(stat);
if (!this->hasFlag(NetMessageHeader::Flag_BuddyMirrorSecond))
{
ts = stat.getMirroredTimestamps();
}
else
{
stat.setMirroredTimestamps(ts);
inode.setStatData(stat);
}
}
void fixInodeTimestamp(FileInode& inode, MirroredTimestamps& ts,
EntryInfo* const saveEntryInfo)
{
if (!isMirrored())
return;
StatData stat;
inode.getStatData(stat);
if (!this->hasFlag(NetMessageHeader::Flag_BuddyMirrorSecond))
{
ts = stat.getMirroredTimestamps();
}
else
{
stat.setMirroredTimestamps(ts);
inode.setStatData(stat);
if (saveEntryInfo)
inode.updateInodeOnDisk(saveEntryInfo);
}
}
void updateNodeOp(NetMessage::ResponseContext& ctx, MetaOpCounterTypes type)
{
const auto counter = isMirrored() && this->hasFlag(NetMessageHeader::Flag_BuddyMirrorSecond)
? MetaOpCounter_MIRROR
: type;
Program::getApp()->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
counter, this->getMsgHeaderUserID());
}
private:
std::shared_ptr<MirrorStateSlot> mirrorState;
void setBuddyNeedsResync()
{
BuddyCommTk::setBuddyNeedsResync(Program::getApp()->getMetaPath(), true);
}
};

View File

@@ -0,0 +1,354 @@
// control messages
#include <common/net/message/control/AuthenticateChannelMsgEx.h>
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/control/PeerInfoMsgEx.h>
#include <net/message/control/AckMsgEx.h>
#include <net/message/control/SetChannelDirectMsgEx.h>
// nodes messages
#include <common/net/message/nodes/ChangeTargetConsistencyStatesRespMsg.h>
#include <common/net/message/nodes/GetNodeCapacityPoolsRespMsg.h>
#include <common/net/message/nodes/GetNodesRespMsg.h>
#include <common/net/message/nodes/GetTargetMappingsRespMsg.h>
#include <common/net/message/nodes/GetMirrorBuddyGroupsRespMsg.h>
#include <common/net/message/nodes/GetStatesAndBuddyGroupsRespMsg.h>
#include <common/net/message/nodes/GetTargetStatesRespMsg.h>
#include <common/net/message/nodes/RegisterNodeRespMsg.h>
#include <common/net/message/nodes/RemoveNodeRespMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/net/message/nodes/storagepools/GetStoragePoolsRespMsg.h>
#include <net/message/nodes/GenericDebugMsgEx.h>
#include <net/message/nodes/GetClientStatsMsgEx.h>
#include <net/message/nodes/GetNodeCapacityPoolsMsgEx.h>
#include <net/message/nodes/GetNodesMsgEx.h>
#include <net/message/nodes/GetTargetMappingsMsgEx.h>
#include <net/message/nodes/HeartbeatMsgEx.h>
#include <net/message/nodes/HeartbeatRequestMsgEx.h>
#include <net/message/nodes/MapTargetsMsgEx.h>
#include <net/message/nodes/PublishCapacitiesMsgEx.h>
#include <net/message/nodes/RefreshCapacityPoolsMsgEx.h>
#include <net/message/nodes/RemoveNodeMsgEx.h>
#include <net/message/nodes/RefreshTargetStatesMsgEx.h>
#include <net/message/nodes/SetMirrorBuddyGroupMsgEx.h>
#include <net/message/nodes/SetTargetConsistencyStatesMsgEx.h>
#include <net/message/nodes/storagepools/RefreshStoragePoolsMsgEx.h>
// storage messages
#include <common/net/message/storage/attribs/RefreshEntryInfoRespMsg.h>
#include <common/net/message/storage/attribs/GetChunkFileAttribsRespMsg.h>
#include <common/net/message/storage/listing/ListDirFromOffsetRespMsg.h>
#include <common/net/message/storage/lookup/FindOwnerRespMsg.h>
#include <common/net/message/storage/lookup/LookupIntentRespMsg.h>
#include <common/net/message/storage/creating/MkDirRespMsg.h>
#include <common/net/message/storage/creating/MkFileRespMsg.h>
#include <common/net/message/storage/creating/MkFileWithPatternRespMsg.h>
#include <common/net/message/storage/creating/MkLocalDirRespMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/creating/RmDirRespMsg.h>
#include <common/net/message/storage/creating/RmLocalDirRespMsg.h>
#include <common/net/message/storage/mirroring/MirrorMetadataRespMsg.h>
#include <common/net/message/storage/mirroring/ResyncRawInodesRespMsg.h>
#include <common/net/message/storage/mirroring/ResyncSessionStoreRespMsg.h>
#include <common/net/message/storage/moving/MovingDirInsertRespMsg.h>
#include <common/net/message/storage/moving/MovingFileInsertRespMsg.h>
#include <common/net/message/storage/moving/RenameRespMsg.h>
#include <common/net/message/storage/quota/RequestExceededQuotaRespMsg.h>
#include <common/net/message/storage/attribs/SetAttrRespMsg.h>
#include <common/net/message/storage/attribs/SetFilePatternRespMsg.h>
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
#include <common/net/message/storage/attribs/StatRespMsg.h>
#include <common/net/message/storage/StatStoragePathRespMsg.h>
#include <common/net/message/storage/TruncFileRespMsg.h>
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
#include <common/net/message/storage/creating/UnlinkFileRespMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
#include <common/net/message/storage/creating/MoveFileInodeMsg.h>
#include <common/net/message/storage/creating/MoveFileInodeRespMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileInodeRespMsg.h>
#include <common/net/message/storage/attribs/GetEntryInfoRespMsg.h>
#include <common/net/message/storage/attribs/RemoveXAttrRespMsg.h>
#include <common/net/message/storage/attribs/SetXAttrRespMsg.h>
#include <common/net/message/storage/creating/HardlinkRespMsg.h>
#include <common/net/message/storage/attribs/UpdateDirParentRespMsg.h>
#include <common/net/message/storage/SetStorageTargetInfoRespMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <net/message/storage/lookup/FindOwnerMsgEx.h>
#include <net/message/storage/listing/ListDirFromOffsetMsgEx.h>
#include <net/message/storage/creating/MkDirMsgEx.h>
#include <net/message/storage/creating/MkFileMsgEx.h>
#include <net/message/storage/creating/MkFileWithPatternMsgEx.h>
#include <net/message/storage/creating/MkLocalDirMsgEx.h>
#include <net/message/storage/creating/RmDirMsgEx.h>
#include <net/message/storage/creating/RmLocalDirMsgEx.h>
#include <net/message/storage/creating/UnlinkLocalFileInodeMsgEx.h>
#include <net/message/storage/creating/RmDirEntryMsgEx.h>
#include <net/message/storage/mirroring/GetMetaResyncStatsMsgEx.h>
#include <net/message/storage/mirroring/ResyncSessionStoreMsgEx.h>
#include <net/message/storage/mirroring/SetMetadataMirroringMsgEx.h>
#include <net/message/storage/mirroring/StorageResyncStartedMsgEx.h>
#include <net/message/storage/moving/MovingDirInsertMsgEx.h>
#include <net/message/storage/moving/MovingFileInsertMsgEx.h>
#include <net/message/storage/moving/RenameV2MsgEx.h>
#include <net/message/storage/quota/SetExceededQuotaMsgEx.h>
#include <net/message/storage/attribs/GetEntryInfoMsgEx.h>
#include <net/message/storage/lookup/LookupIntentMsgEx.h>
#include <net/message/storage/attribs/GetXAttrMsgEx.h>
#include <net/message/storage/attribs/ListXAttrMsgEx.h>
#include <net/message/storage/attribs/RemoveXAttrMsgEx.h>
#include <net/message/storage/attribs/SetAttrMsgEx.h>
#include <net/message/storage/attribs/SetDirPatternMsgEx.h>
#include <net/message/storage/attribs/SetFilePatternMsgEx.h>
#include <net/message/storage/attribs/SetFileStateMsgEx.h>
#include <net/message/storage/attribs/SetXAttrMsgEx.h>
#include <net/message/storage/attribs/StatMsgEx.h>
#include <net/message/storage/attribs/UpdateDirParentMsgEx.h>
#include <net/message/storage/StatStoragePathMsgEx.h>
#include <net/message/storage/TruncFileMsgEx.h>
#include <net/message/storage/creating/UnlinkFileMsgEx.h>
#include <net/message/storage/GetHighResStatsMsgEx.h>
#include <net/message/storage/attribs/RefreshEntryInfoMsgEx.h>
#include <net/message/storage/lookup/FindLinkOwnerMsgEx.h>
#include <net/message/storage/creating/HardlinkMsgEx.h>
#include <net/message/storage/creating/MoveFileInodeMsgEx.h>
#include <net/message/storage/attribs/UpdateDirParentMsgEx.h>
#include <net/message/storage/mirroring/ResyncRawInodesMsgEx.h>
// session messages
#include <common/net/message/session/AckNotifyRespMsg.h>
#include <common/net/message/session/BumpFileVersionRespMsg.h>
#include <common/net/message/session/FSyncLocalFileRespMsg.h>
#include <common/net/message/session/locking/FLockEntryRespMsg.h>
#include <common/net/message/session/locking/FLockRangeRespMsg.h>
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
#include <common/net/message/session/opening/CloseFileRespMsg.h>
#include <common/net/message/session/opening/OpenFileRespMsg.h>
#include <common/net/message/session/rw/WriteLocalFileRespMsg.h>
#include <common/net/message/storage/attribs/SetDirPatternRespMsg.h>
#include <net/message/session/AckNotifyMsgEx.h>
#include <net/message/session/BumpFileVersionMsgEx.h>
#include <net/message/session/GetFileVersionMsgEx.h>
#include <net/message/session/locking/FLockAppendMsgEx.h>
#include <net/message/session/locking/FLockEntryMsgEx.h>
#include <net/message/session/locking/FLockRangeMsgEx.h>
#include <net/message/session/opening/CloseFileMsgEx.h>
#include <net/message/session/opening/OpenFileMsgEx.h>
// mon message
#include <net/message/mon/RequestMetaDataMsgEx.h>
// fsck messages
#include <net/message/fsck/CreateDefDirInodesMsgEx.h>
#include <net/message/fsck/CreateEmptyContDirsMsgEx.h>
#include <net/message/fsck/DeleteDirEntriesMsgEx.h>
#include <net/message/fsck/FixInodeOwnersMsgEx.h>
#include <net/message/fsck/FixInodeOwnersInDentryMsgEx.h>
#include <net/message/fsck/LinkToLostAndFoundMsgEx.h>
#include <net/message/fsck/RecreateFsIDsMsgEx.h>
#include <net/message/fsck/RecreateDentriesMsgEx.h>
#include <net/message/fsck/RemoveInodesMsgEx.h>
#include <net/message/fsck/RetrieveDirEntriesMsgEx.h>
#include <net/message/fsck/RetrieveInodesMsgEx.h>
#include <net/message/fsck/RetrieveFsIDsMsgEx.h>
#include <net/message/fsck/FsckSetEventLoggingMsgEx.h>
#include <net/message/fsck/UpdateDirAttribsMsgEx.h>
#include <net/message/fsck/UpdateFileAttribsMsgEx.h>
#include <net/message/fsck/AdjustChunkPermissionsMsgEx.h>
#include <net/message/fsck/CheckAndRepairDupInodeMsgEx.h>
// chunk balancing
#include <common/net/message/storage/chunkbalancing/CpChunkPathsRespMsg.h>
#include <net/message/storage/chunkbalancing/ChunkBalanceMsgEx.h>
#include <net/message/storage/chunkbalancing/StripePatternUpdateMsgEx.h>
#include <common/net/message/SimpleMsg.h>
#include <net/message/nodes/storagepools/RefreshStoragePoolsMsgEx.h>
#include "NetMessageFactory.h"
/**
* @return NetMessage that must be deleted by the caller
* (msg->msgType is NETMSGTYPE_Invalid on error)
*/
std::unique_ptr<NetMessage> NetMessageFactory::createFromMsgType(unsigned short msgType) const
{
NetMessage* msg;
switch(msgType)
{
// The following lines are grouped by "type of the message" and ordered alphabetically inside
// the groups. There should always be one message per line to keep a clear layout (although
// this might lead to lines that are longer than usual)
// control messages
case NETMSGTYPE_Ack: { msg = new AckMsgEx(); } break;
case NETMSGTYPE_AuthenticateChannel: { msg = new AuthenticateChannelMsgEx(); } break;
case NETMSGTYPE_GenericResponse: { msg = new GenericResponseMsg(); } break;
case NETMSGTYPE_SetChannelDirect: { msg = new SetChannelDirectMsgEx(); } break;
case NETMSGTYPE_PeerInfo: { msg = new PeerInfoMsgEx(); } break;
// nodes messages
case NETMSGTYPE_ChangeTargetConsistencyStatesResp: { msg = new ChangeTargetConsistencyStatesRespMsg(); } break;
case NETMSGTYPE_GenericDebug: { msg = new GenericDebugMsgEx(); } break;
case NETMSGTYPE_GetClientStats: { msg = new GetClientStatsMsgEx(); } break;
case NETMSGTYPE_GetMirrorBuddyGroupsResp: { msg = new GetMirrorBuddyGroupsRespMsg(); } break;
case NETMSGTYPE_GetNodeCapacityPools: { msg = new GetNodeCapacityPoolsMsgEx(); } break;
case NETMSGTYPE_GetNodeCapacityPoolsResp: { msg = new GetNodeCapacityPoolsRespMsg(); } break;
case NETMSGTYPE_GetNodes: { msg = new GetNodesMsgEx(); } break;
case NETMSGTYPE_GetNodesResp: { msg = new GetNodesRespMsg(); } break;
case NETMSGTYPE_GetStatesAndBuddyGroupsResp: { msg = new GetStatesAndBuddyGroupsRespMsg(); } break;
case NETMSGTYPE_GetStoragePoolsResp: { msg = new GetStoragePoolsRespMsg(); } break;
case NETMSGTYPE_GetTargetMappings: { msg = new GetTargetMappingsMsgEx(); } break;
case NETMSGTYPE_GetTargetMappingsResp: { msg = new GetTargetMappingsRespMsg(); } break;
case NETMSGTYPE_GetTargetStatesResp: { msg = new GetTargetStatesRespMsg(); } break;
case NETMSGTYPE_HeartbeatRequest: { msg = new HeartbeatRequestMsgEx(); } break;
case NETMSGTYPE_Heartbeat: { msg = new HeartbeatMsgEx(); } break;
case NETMSGTYPE_MapTargets: { msg = new MapTargetsMsgEx(); } break;
case NETMSGTYPE_PublishCapacities: { msg = new PublishCapacitiesMsgEx(); } break;
case NETMSGTYPE_RefreshStoragePools: { msg = new RefreshStoragePoolsMsgEx(); } break;
case NETMSGTYPE_RegisterNodeResp: { msg = new RegisterNodeRespMsg(); } break;
case NETMSGTYPE_RemoveNode: { msg = new RemoveNodeMsgEx(); } break;
case NETMSGTYPE_RemoveNodeResp: { msg = new RemoveNodeRespMsg(); } break;
case NETMSGTYPE_RefreshCapacityPools: { msg = new RefreshCapacityPoolsMsgEx(); } break;
case NETMSGTYPE_RefreshTargetStates: { msg = new RefreshTargetStatesMsgEx(); } break;
case NETMSGTYPE_SetMirrorBuddyGroup: { msg = new SetMirrorBuddyGroupMsgEx(); } break;
case NETMSGTYPE_SetTargetConsistencyStates: { msg = new SetTargetConsistencyStatesMsgEx(); } break;
case NETMSGTYPE_SetTargetConsistencyStatesResp: { msg = new SetTargetConsistencyStatesRespMsg(); } break;
// storage messages
case NETMSGTYPE_ChunkBalance: { msg = new ChunkBalanceMsgEx(); } break;
case NETMSGTYPE_CpChunkPathsResp: { msg = new CpChunkPathsRespMsg(); } break;
case NETMSGTYPE_FindLinkOwner: { msg = new FindLinkOwnerMsgEx(); } break;
case NETMSGTYPE_FindOwner: { msg = new FindOwnerMsgEx(); } break;
case NETMSGTYPE_FindOwnerResp: { msg = new FindOwnerRespMsg(); } break;
case NETMSGTYPE_GetChunkFileAttribsResp: { msg = new GetChunkFileAttribsRespMsg(); } break;
case NETMSGTYPE_GetEntryInfo: { msg = new GetEntryInfoMsgEx(); } break;
case NETMSGTYPE_GetEntryInfoResp: { msg = new GetEntryInfoRespMsg(); } break;
case NETMSGTYPE_GetHighResStats: { msg = new GetHighResStatsMsgEx(); } break;
case NETMSGTYPE_GetMetaResyncStats: { msg = new GetMetaResyncStatsMsgEx(); } break;
case NETMSGTYPE_RequestExceededQuotaResp: {msg = new RequestExceededQuotaRespMsg(); } break;
case NETMSGTYPE_SetExceededQuota: {msg = new SetExceededQuotaMsgEx(); } break;
case NETMSGTYPE_StorageResyncStarted: { msg = new StorageResyncStartedMsgEx(); } break;
case NETMSGTYPE_StorageResyncStartedResp: { msg = new StorageResyncStartedRespMsg(); } break;
case NETMSGTYPE_GetXAttr: { msg = new GetXAttrMsgEx(); } break;
case NETMSGTYPE_GetXAttrResp: { msg = new GetXAttrRespMsg(); } break;
case NETMSGTYPE_Hardlink: { msg = new HardlinkMsgEx(); } break;
case NETMSGTYPE_HardlinkResp: { msg = new HardlinkRespMsg(); } break;
case NETMSGTYPE_ListDirFromOffset: { msg = new ListDirFromOffsetMsgEx(); } break;
case NETMSGTYPE_ListDirFromOffsetResp: { msg = new ListDirFromOffsetRespMsg(); } break;
case NETMSGTYPE_ListXAttr: { msg = new ListXAttrMsgEx(); } break;
case NETMSGTYPE_ListXAttrResp: { msg = new ListXAttrRespMsg(); } break;
case NETMSGTYPE_LookupIntent: { msg = new LookupIntentMsgEx(); } break;
case NETMSGTYPE_LookupIntentResp: { msg = new LookupIntentRespMsg(); } break;
case NETMSGTYPE_MkDir: { msg = new MkDirMsgEx(); } break;
case NETMSGTYPE_MkDirResp: { msg = new MkDirRespMsg(); } break;
case NETMSGTYPE_MkFile: { msg = new MkFileMsgEx(); } break;
case NETMSGTYPE_MkFileResp: { msg = new MkFileRespMsg(); } break;
case NETMSGTYPE_MkFileWithPattern: { msg = new MkFileWithPatternMsgEx(); } break;
case NETMSGTYPE_MkFileWithPatternResp: { msg = new MkFileWithPatternRespMsg(); } break;
case NETMSGTYPE_MkLocalDir: { msg = new MkLocalDirMsgEx(); } break;
case NETMSGTYPE_MkLocalDirResp: { msg = new MkLocalDirRespMsg(); } break;
case NETMSGTYPE_MovingDirInsert: { msg = new MovingDirInsertMsgEx(); } break;
case NETMSGTYPE_MovingDirInsertResp: { msg = new MovingDirInsertRespMsg(); } break;
case NETMSGTYPE_MovingFileInsert: { msg = new MovingFileInsertMsgEx(); } break;
case NETMSGTYPE_MovingFileInsertResp: { msg = new MovingFileInsertRespMsg(); } break;
case NETMSGTYPE_RefreshEntryInfo: { msg = new RefreshEntryInfoMsgEx(); } break;
case NETMSGTYPE_RefreshEntryInfoResp: { msg = new RefreshEntryInfoRespMsg(); } break;
case NETMSGTYPE_ResyncRawInodes: { msg = new ResyncRawInodesMsgEx(); } break;
case NETMSGTYPE_ResyncRawInodesResp: { msg = new ResyncRawInodesRespMsg(); } break;
case NETMSGTYPE_ResyncSessionStore: { msg = new ResyncSessionStoreMsgEx(); } break;
case NETMSGTYPE_ResyncSessionStoreResp: { msg = new ResyncSessionStoreRespMsg(); } break;
case NETMSGTYPE_RemoveXAttr: { msg = new RemoveXAttrMsgEx(); } break;
case NETMSGTYPE_RemoveXAttrResp: { msg = new RemoveXAttrRespMsg(); } break;
case NETMSGTYPE_Rename: { msg = new RenameV2MsgEx(); } break;
case NETMSGTYPE_RenameResp: { msg = new RenameRespMsg(); } break;
case NETMSGTYPE_RmChunkPathsResp: { msg = new RmChunkPathsRespMsg(); } break;
case NETMSGTYPE_RmDirEntry: { msg = new RmDirEntryMsgEx(); } break;
case NETMSGTYPE_RmDir: { msg = new RmDirMsgEx(); } break;
case NETMSGTYPE_RmDirResp: { msg = new RmDirRespMsg(); } break;
case NETMSGTYPE_RmLocalDir: { msg = new RmLocalDirMsgEx(); } break;
case NETMSGTYPE_RmLocalDirResp: { msg = new RmLocalDirRespMsg(); } break;
case NETMSGTYPE_SetAttr: { msg = new SetAttrMsgEx(); } break;
case NETMSGTYPE_SetAttrResp: { msg = new SetAttrRespMsg(); } break;
case NETMSGTYPE_SetDirPattern: { msg = new SetDirPatternMsgEx(); } break;
case NETMSGTYPE_SetDirPatternResp: { msg = new SetDirPatternRespMsg(); } break;
case NETMSGTYPE_SetLocalAttrResp: { msg = new SetLocalAttrRespMsg(); } break;
case NETMSGTYPE_SetMetadataMirroring: { msg = new SetMetadataMirroringMsgEx(); } break;
case NETMSGTYPE_SetStorageTargetInfoResp: { msg = new SetStorageTargetInfoRespMsg(); } break;
case NETMSGTYPE_SetXAttr: { msg = new SetXAttrMsgEx(); } break;
case NETMSGTYPE_SetXAttrResp: { msg = new SetXAttrRespMsg(); } break;
case NETMSGTYPE_Stat: { msg = new StatMsgEx(); } break;
case NETMSGTYPE_StatResp: { msg = new StatRespMsg(); } break;
case NETMSGTYPE_StatStoragePath: { msg = new StatStoragePathMsgEx(); } break;
case NETMSGTYPE_StatStoragePathResp: { msg = new StatStoragePathRespMsg(); } break;
case NETMSGTYPE_StripePatternUpdate: { msg = new StripePatternUpdateMsgEx(); } break;
case NETMSGTYPE_TruncFile: { msg = new TruncFileMsgEx(); } break;
case NETMSGTYPE_TruncFileResp: { msg = new TruncFileRespMsg(); } break;
case NETMSGTYPE_TruncLocalFileResp: { msg = new TruncLocalFileRespMsg(); } break;
case NETMSGTYPE_UnlinkFile: { msg = new UnlinkFileMsgEx(); } break;
case NETMSGTYPE_UnlinkFileResp: { msg = new UnlinkFileRespMsg(); } break;
case NETMSGTYPE_UnlinkLocalFileResp: { msg = new UnlinkLocalFileRespMsg(); } break;
case NETMSGTYPE_UpdateDirParent: { msg = new UpdateDirParentMsgEx(); } break;
case NETMSGTYPE_UpdateDirParentResp: { msg = new UpdateDirParentRespMsg(); } break;
case NETMSGTYPE_MoveFileInode: { msg = new MoveFileInodeMsgEx(); } break;
case NETMSGTYPE_MoveFileInodeResp: {msg = new MoveFileInodeRespMsg(); } break;
case NETMSGTYPE_UnlinkLocalFileInode: {msg = new UnlinkLocalFileInodeMsgEx(); } break;
case NETMSGTYPE_UnlinkLocalFileInodeResp: {msg = new UnlinkLocalFileInodeRespMsg(); } break;
case NETMSGTYPE_SetFilePattern: { msg = new SetFilePatternMsgEx(); } break;
case NETMSGTYPE_SetFilePatternResp: { msg = new SetFilePatternRespMsg(); } break;
case NETMSGTYPE_SetFileState: { msg = new SetFileStateMsgEx(); } break;
case NETMSGTYPE_SetFileStateResp: { msg = new SetFileStateRespMsg(); } break;
// session messages
case NETMSGTYPE_BumpFileVersion: { msg = new BumpFileVersionMsgEx(); } break;
case NETMSGTYPE_BumpFileVersionResp: { msg = new BumpFileVersionRespMsg(); } break;
case NETMSGTYPE_OpenFile: { msg = new OpenFileMsgEx(); } break;
case NETMSGTYPE_OpenFileResp: { msg = new OpenFileRespMsg(); } break;
case NETMSGTYPE_CloseFile: { msg = new CloseFileMsgEx(); } break;
case NETMSGTYPE_CloseFileResp: { msg = new CloseFileRespMsg(); } break;
case NETMSGTYPE_CloseChunkFileResp: { msg = new CloseChunkFileRespMsg(); } break;
case NETMSGTYPE_WriteLocalFileResp: { msg = new WriteLocalFileRespMsg(); } break;
case NETMSGTYPE_FSyncLocalFileResp: { msg = new FSyncLocalFileRespMsg(); } break;
case NETMSGTYPE_FLockAppend: { msg = new FLockAppendMsgEx(); } break;
case NETMSGTYPE_FLockAppendResp: { msg = new FLockAppendRespMsg(); } break;
case NETMSGTYPE_FLockEntry: { msg = new FLockEntryMsgEx(); } break;
case NETMSGTYPE_FLockEntryResp: { msg = new FLockEntryRespMsg(); } break;
case NETMSGTYPE_FLockRange: { msg = new FLockRangeMsgEx(); } break;
case NETMSGTYPE_FLockRangeResp: { msg = new FLockRangeRespMsg(); } break;
case NETMSGTYPE_GetFileVersion: { msg = new GetFileVersionMsgEx(); } break;
case NETMSGTYPE_GetFileVersionResp: { msg = new GetFileVersionRespMsg(); } break;
case NETMSGTYPE_AckNotify: { msg = new AckNotifiyMsgEx(); } break;
case NETMSGTYPE_AckNotifyResp: { msg = new AckNotifiyRespMsg(); } break;
// mon message
case NETMSGTYPE_RequestMetaData: { msg = new RequestMetaDataMsgEx(); } break;
// fsck messages
case NETMSGTYPE_RetrieveDirEntries: { msg = new RetrieveDirEntriesMsgEx(); } break;
case NETMSGTYPE_RetrieveInodes: { msg = new RetrieveInodesMsgEx(); } break;
case NETMSGTYPE_RetrieveFsIDs: { msg = new RetrieveFsIDsMsgEx(); } break;
case NETMSGTYPE_DeleteDirEntries: { msg = new DeleteDirEntriesMsgEx(); } break;
case NETMSGTYPE_CreateDefDirInodes: { msg = new CreateDefDirInodesMsgEx(); } break;
case NETMSGTYPE_FixInodeOwners: { msg = new FixInodeOwnersMsgEx(); } break;
case NETMSGTYPE_FixInodeOwnersInDentry: { msg = new FixInodeOwnersInDentryMsgEx(); } break;
case NETMSGTYPE_LinkToLostAndFound: { msg = new LinkToLostAndFoundMsgEx(); } break;
case NETMSGTYPE_CreateEmptyContDirs: { msg = new CreateEmptyContDirsMsgEx(); } break;
case NETMSGTYPE_UpdateFileAttribs: { msg = new UpdateFileAttribsMsgEx(); } break;
case NETMSGTYPE_UpdateDirAttribs: { msg = new UpdateDirAttribsMsgEx(); } break;
case NETMSGTYPE_RemoveInodes: { msg = new RemoveInodesMsgEx(); } break;
case NETMSGTYPE_RecreateFsIDs: { msg = new RecreateFsIDsMsgEx(); } break;
case NETMSGTYPE_RecreateDentries: { msg = new RecreateDentriesMsgEx(); } break;
case NETMSGTYPE_FsckSetEventLogging: { msg = new FsckSetEventLoggingMsgEx(); } break;
case NETMSGTYPE_AdjustChunkPermissions: { msg = new AdjustChunkPermissionsMsgEx(); } break;
case NETMSGTYPE_CheckAndRepairDupInode: { msg = new CheckAndRepairDupInodeMsgEx(); } break;
default:
{
msg = new SimpleMsg(NETMSGTYPE_Invalid);
} break;
}
return std::unique_ptr<NetMessage>(msg);
}

View File

@@ -0,0 +1,14 @@
#pragma once
#include <common/Common.h>
#include <common/net/message/AbstractNetMessageFactory.h>
class NetMessageFactory : public AbstractNetMessageFactory
{
public:
NetMessageFactory() {}
protected:
virtual std::unique_ptr<NetMessage> createFromMsgType(unsigned short msgType) const override;
} ;

View File

@@ -0,0 +1,24 @@
#include <program/Program.h>
#include "AckMsgEx.h"
bool AckMsgEx::processIncoming(ResponseContext& ctx)
{
#ifdef BEEGFS_DEBUG
const char* logContext = "Ack incoming";
#endif //BEEGFS_DEBUG
LOG_DEBUG(logContext, 5, std::string("Value: ") + getValue() );
AcknowledgmentStore* ackStore = Program::getApp()->getAckStore();
ackStore->receivedAck(getValue() );
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), MetaOpCounter_ACK,
getMsgHeaderUserID() );
// note: this message does not require a response
return true;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/control/AckMsg.h>
// see class AcknowledgeableMsg (fhgfs_common) for a short description
class AckMsgEx : public AckMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,21 @@
#include <program/Program.h>
#include "SetChannelDirectMsgEx.h"
bool SetChannelDirectMsgEx::processIncoming(ResponseContext& ctx)
{
#ifdef BEEGFS_DEBUG
const char* logContext = "SetChannelDirect incoming";
LOG_DEBUG(logContext, 5, std::string("Value: ") + StringTk::intToStr(getValue() ) );
#endif // BEEGFS_DEBUG
ctx.getSocket()->setIsDirect(getValue() );
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), MetaOpCounter_SETCHANNELDIRECT,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/control/SetChannelDirectMsg.h>
class SetChannelDirectMsgEx : public SetChannelDirectMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,213 @@
#include "AdjustChunkPermissionsMsgEx.h"
#include <program/Program.h>
#include <common/net/message/storage/attribs/SetLocalAttrMsg.h>
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
#include <common/storage/striping/Raid0Pattern.h>
#include <components/worker/SetChunkFileAttribsWork.h>
bool AdjustChunkPermissionsMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Incoming AdjustChunkPermissionsMsg");
MetaStore *metaStore = Program::getApp()->getMetaStore();
unsigned hashDirNum = this->getHashDirNum();
unsigned maxEntries = this->getMaxEntries();
int64_t lastHashDirOffset = this->getLastHashDirOffset();
int64_t lastContDirOffset = this->getLastContDirOffset();
std::string currentContDirID = this->getCurrentContDirID();
int64_t newHashDirOffset = 0;
int64_t newContDirOffset = 0;
unsigned errorCount = 0;
unsigned readOutEntries = 0;
bool hasNext;
if ( currentContDirID.empty() )
{
hasNext = StorageTkEx::getNextContDirID(hashDirNum, getIsBuddyMirrored(), lastHashDirOffset,
&currentContDirID, &newHashDirOffset);
if ( hasNext )
{
lastHashDirOffset = newHashDirOffset;
}
}
else
hasNext = true;
while ( hasNext )
{
std::string parentID = currentContDirID;
unsigned remainingEntries = maxEntries - readOutEntries;
StringList entryNames;
bool parentDirInodeIsTemp = false;
FileIDLock dirLock;
if (getIsBuddyMirrored())
dirLock = {Program::getApp()->getMirroredSessions()->getEntryLockStore(), parentID, false};
DirInode* parentDirInode = metaStore->referenceDir(parentID, getIsBuddyMirrored(), true);
// it could be, that parentDirInode does not exist
// in fsck we create a temporary inode for this case
if ( unlikely(!parentDirInode) )
{
log.log(
Log_NOTICE,
"Could not reference directory. EntryID: " + parentID
+ " => using temporary directory inode ");
// create temporary inode
int mode = S_IFDIR | S_IRWXU;
UInt16Vector stripeTargets;
Raid0Pattern stripePattern(0, stripeTargets, 0);
parentDirInode = new DirInode(parentID, mode, 0, 0,
Program::getApp()->getLocalNode().getNumID(), stripePattern, getIsBuddyMirrored());
parentDirInodeIsTemp = true;
}
if ( parentDirInode->listIncremental(lastContDirOffset, remainingEntries, &entryNames,
&newContDirOffset) == FhgfsOpsErr_SUCCESS )
{
lastContDirOffset = newContDirOffset;
readOutEntries += entryNames.size();
}
else
{
log.log(Log_WARNING, "Could not list contents of directory. EntryID: " + parentID);
}
// actually process the entries; for the dentry part we only need to know if it is a file
// with inlined inode data
for ( StringListIter namesIter = entryNames.begin(); namesIter != entryNames.end();
namesIter++ )
{
std::string filename = MetaStorageTk::getMetaDirEntryPath(
getIsBuddyMirrored()
? Program::getApp()->getBuddyMirrorDentriesPath()->str()
: Program::getApp()->getDentriesPath()->str(), parentID) + "/" + *namesIter;
EntryInfo entryInfo;
FileInodeStoreData inodeDiskData;
auto [getEntryRes, isFileOpen] = metaStore->getEntryData(parentDirInode, *namesIter, &entryInfo,
&inodeDiskData);
inodeDiskData.setDynamicOrigParentEntryID(parentID);
if (getEntryRes == FhgfsOpsErr_SUCCESS ||
getEntryRes == FhgfsOpsErr_DYNAMICATTRIBSOUTDATED )
{
DirEntryType entryType = entryInfo.getEntryType();
// we only care if inode data is present
if ( (DirEntryType_ISFILE(entryType)) && (entryInfo.getIsInlined() ) )
{
const std::string& inodeID = inodeDiskData.getEntryID();
unsigned userID = inodeDiskData.getInodeStatData()->getUserID();
unsigned groupID = inodeDiskData.getInodeStatData()->getGroupID();
StripePattern* pattern = inodeDiskData.getStripePattern();
PathInfo pathInfo;
inodeDiskData.getPathInfo(&pathInfo);
if ( !this->sendSetAttrMsg(inodeID, userID, groupID, &pathInfo, pattern) )
errorCount++;
}
}
else
{
log.log(Log_WARNING, "Unable to create dir entry from entry with name " + *namesIter
+ " in directory with ID " + parentID);
}
}
if ( parentDirInodeIsTemp )
SAFE_DELETE(parentDirInode);
else
metaStore->releaseDir(parentID);
if ( entryNames.size() < remainingEntries )
{
// directory is at the end => proceed with next
hasNext = StorageTkEx::getNextContDirID(hashDirNum, getIsBuddyMirrored(),
lastHashDirOffset, &currentContDirID, &newHashDirOffset);
if ( hasNext )
{
lastHashDirOffset = newHashDirOffset;
lastContDirOffset = 0;
}
}
else
{
// there are more to come, but we need to exit the loop now, because maxCount is reached
hasNext = false;
}
}
ctx.sendResponse(
AdjustChunkPermissionsRespMsg(readOutEntries, currentContDirID, lastHashDirOffset,
lastContDirOffset, errorCount) );
return true;
}
bool AdjustChunkPermissionsMsgEx::sendSetAttrMsg(const std::string& entryID, unsigned userID,
unsigned groupID, PathInfo* pathInfo, StripePattern* pattern)
{
const char* logContext = "AdjustChunkPermissionsMsgEx::sendSetAttrMsg";
MultiWorkQueue* slaveQueue = Program::getApp()->getCommSlaveQueue();
int validAttribs = SETATTR_CHANGE_USERID | SETATTR_CHANGE_GROUPID; // only interested in these
SettableFileAttribs attribs;
attribs.userID = userID;
attribs.groupID = groupID;
const UInt16Vector* stripeTargets = pattern->getStripeTargetIDs();
size_t numTargetWorks = stripeTargets->size();
FhgfsOpsErrVec nodeResults(numTargetWorks);
SynchronizedCounter counter;
// generate work for storage targets...
for(size_t i=0; i < numTargetWorks; i++)
{
SetChunkFileAttribsWork* work = new SetChunkFileAttribsWork(
entryID, validAttribs, &attribs, false, pattern, (*stripeTargets)[i], pathInfo,
NULL, &(nodeResults[i]), &counter);
work->setQuotaChown(true);
work->setMsgUserID(getMsgHeaderUserID() );
slaveQueue->addDirectWork(work);
}
// wait for work completion...
counter.waitForCount(numTargetWorks);
// check target results...
for(size_t i=0; i < numTargetWorks; i++)
{
if(unlikely(nodeResults[i] != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_WARNING,
"Problems occurred during setting of chunk file attribs. "
"fileID: " + entryID );
return false;
}
}
return true;
}

View File

@@ -0,0 +1,17 @@
#pragma once
#include <common/net/message/fsck/AdjustChunkPermissionsMsg.h>
#include <common/net/message/fsck/AdjustChunkPermissionsRespMsg.h>
#include <common/storage/PathInfo.h>
#include <common/storage/striping/StripePattern.h>
class AdjustChunkPermissionsMsgEx : public AdjustChunkPermissionsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
bool sendSetAttrMsg(const std::string& entryID, unsigned userID, unsigned groupID,
PathInfo* pathInfo, StripePattern* pattern);
};

View File

@@ -0,0 +1,38 @@
#include <program/Program.h>
#include <common/net/message/fsck/CheckAndRepairDupInodeRespMsg.h>
#include "CheckAndRepairDupInodeMsgEx.h"
bool CheckAndRepairDupInodeMsgEx::processIncoming(ResponseContext& ctx)
{
MetaStore* metaStore = Program::getApp()->getMetaStore();
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
StringList failedIDList;
for (const auto& inode : getDuplicateInodes())
{
const std::string& entryID = inode.getID();
const std::string& parentEntryID = inode.getParentDirID();
const bool isBuddyMirrored = inode.getIsBuddyMirrored();
FileIDLock dirLock = {entryLockStore, parentEntryID, true};
FileIDLock fileLock = {entryLockStore, entryID, true};
EntryInfo fileInfo(NumNodeID(0), parentEntryID, entryID, std::string(""), DirEntryType_REGULARFILE, 0);
fileInfo.setBuddyMirroredFlag(isBuddyMirrored);
DirInode* parentDir = metaStore->referenceDir(parentEntryID, isBuddyMirrored, true);
FhgfsOpsErr repairRes = metaStore->checkAndRepairDupFileInode(*parentDir, &fileInfo);
if (repairRes != FhgfsOpsErr_SUCCESS)
{
failedIDList.push_back(entryID);
}
metaStore->releaseDir(parentDir->getID());
}
ctx.sendResponse(CheckAndRepairDupInodeRespMsg(std::move(failedIDList)));
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/fsck/CheckAndRepairDupInodeMsg.h>
class CheckAndRepairDupInodeMsgEx : public CheckAndRepairDupInodeMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,60 @@
#include "CreateDefDirInodesMsgEx.h"
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool CreateDefDirInodesMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("CreateDefDirInodesMsg incoming");
App* app = Program::getApp();
Config* cfg = app->getConfig();
StringList failedInodeIDs;
FsckDirInodeList createdInodes;
for (auto it = items.begin(); it != items.end(); ++it)
{
const std::string& inodeID = std::get<0>(*it);
const bool isBuddyMirrored = std::get<1>(*it);
int mode = S_IFDIR | S_IRWXU;
unsigned userID = 0; // root
unsigned groupID = 0; // root
const NumNodeID ownerNodeID = isBuddyMirrored
? NumNodeID(app->getMetaBuddyGroupMapper()->getLocalGroupID())
: app->getLocalNode().getNumID();
UInt16Vector stripeTargets;
unsigned defaultChunkSize = cfg->getTuneDefaultChunkSize();
unsigned defaultNumStripeTargets = cfg->getTuneDefaultNumStripeTargets();
Raid0Pattern stripePattern(defaultChunkSize, stripeTargets, defaultNumStripeTargets);
// we try to create a new directory inode, with default values
FileIDLock dirLock;
if (isBuddyMirrored)
dirLock = {Program::getApp()->getMirroredSessions()->getEntryLockStore(), inodeID, true};
DirInode dirInode(inodeID, mode, userID, groupID, ownerNodeID, stripePattern,
isBuddyMirrored);
if ( dirInode.storeAsReplacementFile(inodeID) == FhgfsOpsErr_SUCCESS )
{
// try to refresh the metainfo (maybe a .cont directory was already present)
dirInode.refreshMetaInfo();
StatData statData;
dirInode.getStatData(statData);
FsckDirInode fsckDirInode(inodeID, "", NumNodeID(), ownerNodeID, statData.getFileSize(),
statData.getNumHardlinks(), stripeTargets, FsckStripePatternType_RAID0,
ownerNodeID, isBuddyMirrored, true, false);
createdInodes.push_back(fsckDirInode);
}
else
failedInodeIDs.push_back(inodeID);
}
ctx.sendResponse(CreateDefDirInodesRespMsg(&failedInodeIDs, &createdInodes) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/storage/striping/Raid0Pattern.h>
#include <common/net/message/fsck/CreateDefDirInodesMsg.h>
#include <common/net/message/fsck/CreateDefDirInodesRespMsg.h>
class CreateDefDirInodesMsgEx : public CreateDefDirInodesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,77 @@
#include "CreateEmptyContDirsMsgEx.h"
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool CreateEmptyContDirsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "CreateEmptyContDirsMsg incoming";
App* app = Program::getApp();
MetaStore* metaStore = app->getMetaStore();
StringList failedIDs;
for (auto iter = items.begin(); iter != items.end(); iter++)
{
const std::string& dirID = std::get<0>(*iter);
const bool isBuddyMirrored = std::get<1>(*iter);
std::string contentsDirStr = MetaStorageTk::getMetaDirEntryPath(
isBuddyMirrored
? app->getBuddyMirrorDentriesPath()->str()
: app->getDentriesPath()->str(), dirID);
// create contents directory
int mkRes = mkdir(contentsDirStr.c_str(), 0755);
if ( mkRes != 0 )
{ // error
LOG(GENERAL, ERR, "Unable to create contents directory.", contentsDirStr, sysErr);
failedIDs.push_back(dirID);
continue;
}
// create the dirEntryID directory, which allows access to inlined inodes via dirID access
std::string contentsDirIDStr = MetaStorageTk::getMetaDirEntryIDPath(contentsDirStr);
int mkDirIDRes = mkdir(contentsDirIDStr.c_str(), 0755);
if ( mkDirIDRes != 0 )
{ // error
LOG(GENERAL, ERR, "Unable to create dirEntryID directory.", contentsDirIDStr, sysErr);
failedIDs.push_back(dirID);
continue;
}
FileIDLock lock;
if (isBuddyMirrored)
lock = {Program::getApp()->getMirroredSessions()->getEntryLockStore(), dirID, true};
// update the dir attribs
DirInode* dirInode = metaStore->referenceDir(dirID, isBuddyMirrored, true);
if (!dirInode)
{
LOG(GENERAL, ERR, "Unable to reference directory.", dirID);
failedIDs.push_back(dirID);
continue;
}
FhgfsOpsErr refreshRes = dirInode->refreshMetaInfo();
if (refreshRes != FhgfsOpsErr_SUCCESS)
{
LogContext(logContext).log(Log_NOTICE, "Unable to refresh contents directory metadata: "
+ contentsDirStr + ". " + "SysErr: " + System::getErrString());
}
metaStore->releaseDir(dirID);
}
ctx.sendResponse(CreateEmptyContDirsRespMsg(&failedIDs) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/storage/striping/Raid0Pattern.h>
#include <common/net/message/fsck/CreateEmptyContDirsMsg.h>
#include <common/net/message/fsck/CreateEmptyContDirsRespMsg.h>
class CreateEmptyContDirsMsgEx : public CreateEmptyContDirsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,66 @@
#include "DeleteDirEntriesMsgEx.h"
#include <program/Program.h>
#include <common/fsck/FsckDirEntry.h>
#include <toolkit/BuddyCommTk.h>
#include <boost/lexical_cast.hpp>
bool DeleteDirEntriesMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("DeleteDirEntriesMsgEx");
MetaStore* metaStore = Program::getApp()->getMetaStore();
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
FsckDirEntryList& entries = getEntries();
FsckDirEntryList failedEntries;
for ( FsckDirEntryListIter iter = entries.begin(); iter != entries.end(); iter++ )
{
const std::string& parentID = iter->getParentDirID();
const std::string& entryName = iter->getName();
FsckDirEntryType dirEntryType = iter->getEntryType();
FileIDLock dirLock;
ParentNameLock dentryLock;
if (iter->getIsBuddyMirrored())
{
dirLock = {entryLockStore, parentID, true};
dentryLock = {entryLockStore, parentID, entryName};
}
DirInode* parentDirInode = metaStore->referenceDir(parentID, iter->getIsBuddyMirrored(),
true);
if (!parentDirInode)
{
log.log(3,"Failed to delete directory entry; ParentID: " + parentID + "; EntryName: " +
entryName + " - ParentID does not exist");
failedEntries.push_back(*iter);
continue;
}
FhgfsOpsErr unlinkRes;
if (FsckDirEntryType_ISDIR(dirEntryType))
unlinkRes = parentDirInode->removeDir(entryName, NULL);
else
unlinkRes = parentDirInode->unlinkDirEntry(entryName, NULL,
DirEntry_UNLINK_ID_AND_FILENAME);
metaStore->releaseDir(parentID);
if (unlinkRes != FhgfsOpsErr_SUCCESS )
{
log.logErr("Failed to delete directory entry; ParentID: " + parentID + "; EntryName: " +
entryName + "; Err: " + boost::lexical_cast<std::string>(unlinkRes));
failedEntries.push_back(*iter);
}
}
ctx.sendResponse(DeleteDirEntriesRespMsg(&failedEntries) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/DeleteDirEntriesMsg.h>
#include <common/net/message/fsck/DeleteDirEntriesRespMsg.h>
class DeleteDirEntriesMsgEx : public DeleteDirEntriesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,77 @@
#include "FixInodeOwnersInDentryMsgEx.h"
#include <common/storage/striping/Raid0Pattern.h>
#include <common/fsck/FsckDirEntry.h>
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool FixInodeOwnersInDentryMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("FixInodeOwnersInDentryMsgEx");
MetaStore* metaStore = Program::getApp()->getMetaStore();
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
FsckDirEntryList& dentries = getDentries();
FsckDirEntryList failedEntries;
FsckDirEntryListIter dentryIter = dentries.begin();
NumNodeIDListIter ownerIter = getOwners().begin();
while (dentryIter != dentries.end() )
{
const std::string& parentID = dentryIter->getParentDirID();
const std::string& entryName = dentryIter->getName();
ParentNameLock lock;
if (dentryIter->getIsBuddyMirrored())
lock = {entryLockStore, parentID, entryName};
bool parentDirInodeIsTemp = false;
DirInode* parentDirInode = metaStore->referenceDir(parentID,
dentryIter->getIsBuddyMirrored(), true);
// it could be, that parentDirInode does not exist
// in fsck we create a temporary inode for this case, so that we can modify the dentry
// hopefully, the inode itself will get fixed later
if (unlikely(!parentDirInode))
{
log.log(Log_NOTICE,
"Failed to update directory entry. Parent directory could not be "
"referenced. parentID: " + parentID + " entryName: " + entryName
+ " => Using temporary inode");
// create temporary inode
int mode = S_IFDIR | S_IRWXU;
UInt16Vector stripeTargets;
Raid0Pattern stripePattern(0, stripeTargets, 0);
parentDirInode = new DirInode(parentID, mode, 0, 0,
Program::getApp()->getLocalNode().getNumID(), stripePattern,
dentryIter->getIsBuddyMirrored());
parentDirInodeIsTemp = true;
}
FhgfsOpsErr updateRes = parentDirInode->setOwnerNodeID(entryName, *ownerIter);
if (updateRes != FhgfsOpsErr_SUCCESS )
{
log.log(Log_WARNING, "Failed to update directory entry. parentID: " + parentID +
" entryName: " + entryName);
failedEntries.push_back(*dentryIter);
}
if (parentDirInodeIsTemp)
SAFE_DELETE(parentDirInode);
else
metaStore->releaseDir(parentID);
dentryIter++;
ownerIter++;
}
ctx.sendResponse(FixInodeOwnersInDentryRespMsg(&failedEntries) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/FixInodeOwnersInDentryMsg.h>
#include <common/net/message/fsck/FixInodeOwnersInDentryRespMsg.h>
class FixInodeOwnersInDentryMsgEx : public FixInodeOwnersInDentryMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,51 @@
#include "FixInodeOwnersMsgEx.h"
#include <common/fsck/FsckDirEntry.h>
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool FixInodeOwnersMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "FixInodeOwnersMsgEx incoming";
MetaStore* metaStore = Program::getApp()->getMetaStore();
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
FsckDirInodeList& inodes = getInodes();
FsckDirInodeList failedInodes;
for ( FsckDirInodeListIter iter = inodes.begin(); iter != inodes.end(); iter++ )
{
const std::string& entryID = iter->getID();
NumNodeID ownerNodeID = iter->getOwnerNodeID();
FileIDLock lock;
if (iter->getIsBuddyMirrored())
lock = {entryLockStore, entryID, true};
DirInode* dirInode = metaStore->referenceDir(entryID, iter->getIsBuddyMirrored(), true);
if (unlikely(!dirInode))
{
LogContext(logContext).log(Log_WARNING, "Failed to update directory inode. Inode could"
" not be referenced. entryID: " + entryID);
continue; // continue to next entry
}
bool updateRes = dirInode->setOwnerNodeID(ownerNodeID);
metaStore->releaseDir(entryID);
if (!updateRes)
{
LogContext(logContext).log(Log_WARNING, "Failed to update directory inode. entryID: "
+ entryID);
failedInodes.push_back(*iter);
}
}
ctx.sendResponse(FixInodeOwnersRespMsg(&failedInodes) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/FixInodeOwnersMsg.h>
#include <common/net/message/fsck/FixInodeOwnersRespMsg.h>
class FixInodeOwnersMsgEx : public FixInodeOwnersMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,34 @@
#include <components/ModificationEventFlusher.h>
#include <program/Program.h>
#include "FsckSetEventLoggingMsgEx.h"
bool FsckSetEventLoggingMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("FsckSetEventLoggingMsg incoming");
App* app = Program::getApp();
ModificationEventFlusher* flusher = app->getModificationEventFlusher();
bool result;
bool loggingEnabled;
bool missedEvents;
bool enableLogging = this->getEnableLogging();
if (enableLogging)
{
loggingEnabled = flusher->enableLogging(getPortUDP(), getNicList(), getForceRestart());
result = true; // (always true when logging is enabled)
missedEvents = true; // (value ignored when logging is enabled)
}
else
{ // disable logging
result = flusher->disableLogging();
loggingEnabled = false; // (value ignored when logging is disabled)
missedEvents = flusher->getFsckMissedEvent();
}
ctx.sendResponse(FsckSetEventLoggingRespMsg(result, loggingEnabled, missedEvents));
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/fsck/FsckSetEventLoggingMsg.h>
#include <common/net/message/fsck/FsckSetEventLoggingRespMsg.h>
class FsckSetEventLoggingMsgEx : public FsckSetEventLoggingMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,108 @@
#include "LinkToLostAndFoundMsgEx.h"
#include <common/net/message/fsck/RemoveInodesMsg.h>
#include <common/net/message/fsck/RemoveInodesRespMsg.h>
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool LinkToLostAndFoundMsgEx::processIncoming(ResponseContext& ctx)
{
if (FsckDirEntryType_ISDIR(this->getEntryType()))
{
FsckDirEntryList createdDirEntries;
FsckDirInodeList failedInodes;
linkDirInodes(&failedInodes, &createdDirEntries);
ctx.sendResponse(LinkToLostAndFoundRespMsg(&failedInodes, &createdDirEntries) );
}
else
{
LOG(COMMUNICATION, ERR, "LinkToLostAndFoundMsg received for non-inlined file inode.",
("from", ctx.peerName()));
return false;
}
return true;
}
void LinkToLostAndFoundMsgEx::linkDirInodes(FsckDirInodeList* outFailedInodes,
FsckDirEntryList* outCreatedDirEntries)
{
const char* logContext = "LinkToLostAndFoundMsgEx (linkDirInodes)";
NumNodeID localNodeNumID = Program::getApp()->getLocalNode().getNumID();
FsckDirInodeList& dirInodes = getDirInodes();
MetaStore* metaStore = Program::getApp()->getMetaStore();
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
EntryInfo* lostAndFoundInfo = this->getLostAndFoundInfo();
DirInode* lostAndFoundDir = metaStore->referenceDir(lostAndFoundInfo->getEntryID(),
lostAndFoundInfo->getIsBuddyMirrored(), true);
if ( !lostAndFoundDir )
{
*outFailedInodes = dirInodes;
return;
}
else
{
for ( FsckDirInodeListIter iter = dirInodes.begin(); iter != dirInodes.end(); iter++ )
{
const std::string& entryID = iter->getID();
NumNodeID ownerNodeID = iter->getOwnerNodeID();
DirEntryType entryType = DirEntryType_DIRECTORY;
DirEntry newDirEntry(entryType, entryID, entryID, ownerNodeID);
FileIDLock lock;
if (iter->getIsBuddyMirrored())
{
lock = {entryLockStore, entryID, true};
newDirEntry.setBuddyMirrorFeatureFlag();
}
bool makeRes = lostAndFoundDir->makeDirEntry(newDirEntry);
// stat the new file to get device and inode information
std::string filename = MetaStorageTk::getMetaDirEntryPath(
lostAndFoundInfo->getIsBuddyMirrored()
? Program::getApp()->getBuddyMirrorDentriesPath()->str()
: Program::getApp()->getDentriesPath()->str(),
lostAndFoundInfo->getEntryID()) + "/" + entryID;
struct stat statBuf;
int statRes = stat(filename.c_str(), &statBuf);
int saveDevice;
uint64_t saveInode;
if ( likely(!statRes) )
{
saveDevice = statBuf.st_dev;
saveInode = statBuf.st_ino;
}
else
{
saveDevice = 0;
saveInode = 0;
LogContext(logContext).log(Log_CRITICAL,
"Could not stat dir entry file; entryID: " + entryID + ";filename: " + filename);
}
if ( makeRes != FhgfsOpsErr_SUCCESS )
outFailedInodes->push_back(*iter);
else
{
std::string parentID = lostAndFoundInfo->getEntryID();
FsckDirEntry newFsckDirEntry(entryID, entryID, parentID, localNodeNumID,
ownerNodeID, FsckDirEntryType_DIRECTORY, false, localNodeNumID,
saveDevice, saveInode, lostAndFoundInfo->getIsBuddyMirrored());
outCreatedDirEntries->push_back(newFsckDirEntry);
}
}
lostAndFoundDir->refreshMetaInfo();
metaStore->releaseDir(lostAndFoundInfo->getEntryID() );
}
}

View File

@@ -0,0 +1,24 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/LinkToLostAndFoundMsg.h>
#include <common/net/message/fsck/LinkToLostAndFoundRespMsg.h>
#include <common/net/message/storage/creating/MkDirMsg.h>
#include <common/net/message/storage/creating/MkDirRespMsg.h>
#include <common/storage/StorageErrors.h>
#include <dirent.h>
class LinkToLostAndFoundMsgEx : public LinkToLostAndFoundMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
void linkDirInodes(FsckDirInodeList* outFailedInodes, FsckDirEntryList* outCreatedDirEntries);
void linkFileInodes(FsckFileInodeList* outFailedInodes,
FsckDirEntryList* outCreatedDirEntries);
FhgfsOpsErr deleteInode(std::string& entryID, uint16_t ownerNodeID);
};

View File

@@ -0,0 +1,141 @@
#include "RecreateDentriesMsgEx.h"
#include <common/fsck/FsckDirEntry.h>
#include <common/fsck/FsckFsID.h>
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool RecreateDentriesMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("RecreateDentriesMsgEx");
App* app = Program::getApp();
MetaStore* metaStore = app->getMetaStore();
EntryLockStore* entryLockStore = app->getMirroredSessions()->getEntryLockStore();
FsckFsIDList& fsIDs = getFsIDs();
FsckFsIDList failedCreates;
FsckDirEntryList createdDentries;
FsckFileInodeList createdInodes;
for ( FsckFsIDListIter iter = fsIDs.begin(); iter != fsIDs.end(); iter++ )
{
NumNodeID localNodeID = iter->getIsBuddyMirrored()
? NumNodeID(app->getMetaBuddyGroupMapper()->getLocalGroupID())
: app->getLocalNodeNumID();
std::string parentPath = MetaStorageTk::getMetaDirEntryPath(
iter->getIsBuddyMirrored()
? app->getBuddyMirrorDentriesPath()->str()
: app->getDentriesPath()->str(), iter->getParentDirID());
std::string dirEntryIDFilePath = MetaStorageTk::getMetaDirEntryIDPath(parentPath) + "/"
+ iter->getID();
// the name is lost, so we take the ID as new name
std::string dirEntryNameFilePath = parentPath + "/" + iter->getID();
// before we link, let's see if we can open the parent dir, otherwise we should not mess
// around here
const std::string& dirID = iter->getParentDirID();
FileIDLock dirLock;
ParentNameLock dentryLock;
if (iter->getIsBuddyMirrored())
{
dirLock = {entryLockStore, dirID, true};
dentryLock = {entryLockStore, dirID, iter->getID()};
}
DirInode* parentDirInode = metaStore->referenceDir(dirID, iter->getIsBuddyMirrored(), false);
if (!parentDirInode)
{
log.logErr("Unable to reference parent directory; ID: " + iter->getParentDirID());
failedCreates.push_back(*iter);
continue;
}
// link the dentry-by-name file
int linkRes = link(dirEntryIDFilePath.c_str(), dirEntryNameFilePath.c_str());
if ( linkRes )
{
// error occured while linking
log.logErr(
"Failed to link dentry file; ParentID: " + iter->getParentDirID() + "; ID: "
+ iter->getID());
failedCreates.push_back(*iter);
metaStore->releaseDir(dirID);
continue;
}
// linking was OK => gather dentry (and inode) data, so fsck can add it
DirEntry dirEntry(iter->getID());
bool getRes = parentDirInode->getDentry(iter->getID(), dirEntry);
if (!getRes)
{
log.logErr(
"Could not read the created dentry file; ParentID: " + iter->getParentDirID() + "; ID: "
+ iter->getID());
failedCreates.push_back(*iter);
metaStore->releaseDir(dirID);
continue;
}
// create the FsckDirEntry
FsckDirEntry fsckDirEntry(dirEntry.getID(), dirEntry.getName(), iter->getParentDirID(),
localNodeID, localNodeID,
FsckTk::DirEntryTypeToFsckDirEntryType(dirEntry.getEntryType()), true, localNodeID,
iter->getSaveDevice(), iter->getSaveInode(), iter->getIsBuddyMirrored());
createdDentries.push_back(fsckDirEntry);
// inlined inode data should be present, because otherwise dentry-by-id file would not
// exist, and we could not get this far
FileInodeStoreData* inodeData = dirEntry.getInodeStoreData();
if ( inodeData )
{
int pathInfoFlags;
if (inodeData->getOrigFeature() == FileInodeOrigFeature_TRUE)
pathInfoFlags = PATHINFO_FEATURE_ORIG;
else
pathInfoFlags = PATHINFO_FEATURE_ORIG_UNKNOWN;
PathInfo pathInfo(inodeData->getOrigUID(), inodeData->getOrigParentEntryID(),
pathInfoFlags);
UInt16Vector targetIDs;
unsigned chunkSize;
FsckStripePatternType fsckStripePatternType = FsckTk::stripePatternToFsckStripePattern(
inodeData->getPattern(), &chunkSize, &targetIDs);
FsckFileInode fsckFileInode(inodeData->getEntryID(), iter->getParentDirID(),
localNodeID, pathInfo, inodeData->getInodeStatData(),
inodeData->getInodeStatData()->getNumBlocks(), targetIDs, fsckStripePatternType,
chunkSize, localNodeID, iter->getSaveInode(), iter->getSaveDevice(), true,
iter->getIsBuddyMirrored(), true, false);
createdInodes.push_back(fsckFileInode);
}
else
{
log.logErr(
"No inlined inode data found; parentID: " + iter->getParentDirID() + "; ID: "
+ iter->getID());
}
metaStore->releaseDir(dirID);
}
ctx.sendResponse(RecreateDentriesRespMsg(&failedCreates, &createdDentries, &createdInodes) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/RecreateDentriesMsg.h>
#include <common/net/message/fsck/RecreateDentriesRespMsg.h>
class RecreateDentriesMsgEx : public RecreateDentriesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,65 @@
#include "RecreateFsIDsMsgEx.h"
#include <common/fsck/FsckDirEntry.h>
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool RecreateFsIDsMsgEx::processIncoming(ResponseContext& ctx)
{
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
LogContext log("RecreateFsIDsMsgEx");
FsckDirEntryList& entries = getEntries();
FsckDirEntryList failedEntries;
for ( FsckDirEntryListIter iter = entries.begin(); iter != entries.end(); iter++ )
{
const std::string& parentID = iter->getParentDirID();
const std::string& entryName = iter->getName();
const std::string& entryID = iter->getID();
std::string dirEntryPath = MetaStorageTk::getMetaDirEntryPath(
iter->getIsBuddyMirrored()
? Program::getApp()->getBuddyMirrorDentriesPath()->str()
: Program::getApp()->getDentriesPath()->str(), parentID);
std::string dirEntryIDFilePath = MetaStorageTk::getMetaDirEntryIDPath(dirEntryPath) +
"/" + entryID;
std::string dirEntryNameFilePath = dirEntryPath + "/" + entryName;
FileIDLock dirLock(entryLockStore, parentID, true);
ParentNameLock dentryLock(entryLockStore, parentID, entryName);
FileIDLock fileLock(entryLockStore, entryID, true);
// delete the old dentry-by-id file link (if one existed)
int removeRes = unlink(dirEntryIDFilePath.c_str());
if ( (removeRes) && (errno != ENOENT) )
{
log.logErr(
"Failed to recreate dentry-by-id file for directory entry; ParentID: " + parentID
+ "; EntryName: " + entryName
+ " - Could not delete old, faulty dentry-by-id file link");
failedEntries.push_back(*iter);
continue;
}
// link a new one
int linkRes = link(dirEntryNameFilePath.c_str(), dirEntryIDFilePath.c_str());
if ( linkRes )
{
log.logErr(
"Failed to recreate dentry-by-id file for directory entry; ParentID: " + parentID
+ "; EntryName: " + entryName + " - File could not be linked");
failedEntries.push_back(*iter);
continue;
}
}
ctx.sendResponse(RecreateFsIDsRespMsg(&failedEntries) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/RecreateFsIDsMsg.h>
#include <common/net/message/fsck/RecreateFsIDsRespMsg.h>
class RecreateFsIDsMsgEx : public RecreateFsIDsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,44 @@
#include <program/Program.h>
#include <common/net/message/fsck/RemoveInodesRespMsg.h>
#include <common/toolkit/ZipIterator.h>
#include <toolkit/BuddyCommTk.h>
#include "RemoveInodesMsgEx.h"
bool RemoveInodesMsgEx::processIncoming(ResponseContext& ctx)
{
MetaStore* metaStore = Program::getApp()->getMetaStore();
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
StringList failedIDList;
for (auto it = items.begin(); it != items.end(); ++it)
{
const std::string& entryID = std::get<0>(*it);
const DirEntryType entryType = std::get<1>(*it);
const bool isBuddyMirrored = std::get<2>(*it);
FhgfsOpsErr rmRes;
FileIDLock dirLock;
FileIDLock fileLock;
if (entryType == DirEntryType_DIRECTORY)
{
dirLock = {entryLockStore, entryID, true};
rmRes = metaStore->removeDirInode(entryID, isBuddyMirrored);
}
else
{
fileLock = {entryLockStore, entryID, true};
rmRes = metaStore->fsckUnlinkFileInode(entryID, isBuddyMirrored);
}
if (rmRes != FhgfsOpsErr_SUCCESS)
failedIDList.push_back(entryID);
}
ctx.sendResponse(RemoveInodesRespMsg(std::move(failedIDList)));
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/fsck/RemoveInodesMsg.h>
class RemoveInodesMsgEx : public RemoveInodesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,269 @@
#include "RetrieveDirEntriesMsgEx.h"
#include <common/storage/striping/Raid0Pattern.h>
#include <net/msghelpers/MsgHelperStat.h>
#include <program/Program.h>
bool RetrieveDirEntriesMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Incoming RetrieveDirEntriesMsg");
unsigned hashDirNum = getHashDirNum();
std::string currentContDirID = getCurrentContDirID();
unsigned maxOutEntries = getMaxOutEntries();
int64_t lastContDirOffset = getLastContDirOffset();
int64_t lastHashDirOffset = getLastHashDirOffset();
int64_t newHashDirOffset;
int64_t newContDirOffset;
FsckContDirList contDirsOutgoing;
FsckDirEntryList dirEntriesOutgoing;
FsckFileInodeList inlinedFileInodesOutgoing;
unsigned readOutEntries = 0;
NumNodeID localNodeNumID = getIsBuddyMirrored()
? NumNodeID(Program::getApp()->getMetaBuddyGroupMapper()->getLocalGroupID())
: Program::getApp()->getLocalNode().getNumID();
MetaStore* metaStore = Program::getApp()->getMetaStore();
MirrorBuddyGroupMapper* bgm = Program::getApp()->getMetaBuddyGroupMapper();
if (getIsBuddyMirrored() &&
(bgm->getLocalBuddyGroup().secondTargetID
== Program::getApp()->getLocalNode().getNumID().val()
|| bgm->getLocalGroupID() == 0))
{
ctx.sendResponse(
RetrieveDirEntriesRespMsg(&contDirsOutgoing, &dirEntriesOutgoing,
&inlinedFileInodesOutgoing, currentContDirID, lastHashDirOffset, lastContDirOffset));
return true;
}
bool hasNext;
if ( currentContDirID.empty() )
{
hasNext = StorageTkEx::getNextContDirID(hashDirNum, getIsBuddyMirrored(), lastHashDirOffset,
&currentContDirID, &newHashDirOffset);
if ( hasNext )
{
lastHashDirOffset = newHashDirOffset;
// we found a new .cont directory => send it to fsck
FsckContDir contDir(currentContDirID, localNodeNumID, getIsBuddyMirrored());
contDirsOutgoing.push_back(contDir);
}
}
else
hasNext = true;
while ( hasNext )
{
std::string parentID = currentContDirID;
unsigned remainingOutNames = maxOutEntries - readOutEntries;
StringList entryNames;
bool parentDirInodeIsTemp = false;
DirInode* parentDirInode = metaStore->referenceDir(parentID, getIsBuddyMirrored(), true);
// it could be, that parentDirInode does not exist
// in fsck we create a temporary inode for this case, so that we can modify the dentry
// hopefully, the inode itself will get fixed later
if ( unlikely(!parentDirInode) )
{
log.log(
Log_NOTICE,
"Could not reference directory. EntryID: " + parentID
+ " => using temporary directory inode ");
// create temporary inode
int mode = S_IFDIR | S_IRWXU;
UInt16Vector stripeTargets;
Raid0Pattern stripePattern(0, stripeTargets, 0);
parentDirInode = new DirInode(parentID, mode, 0, 0,
Program::getApp()->getLocalNode().getNumID(), stripePattern, getIsBuddyMirrored());
parentDirInodeIsTemp = true;
}
if ( parentDirInode->listIncremental(lastContDirOffset, remainingOutNames, &entryNames,
&newContDirOffset) == FhgfsOpsErr_SUCCESS )
{
lastContDirOffset = newContDirOffset;
}
else
{
log.log(Log_WARNING, "Could not list contents of directory. EntryID: " + parentID);
}
// actually process the entries
for ( StringListIter namesIter = entryNames.begin(); namesIter != entryNames.end();
namesIter++ )
{
std::string filename = MetaStorageTk::getMetaDirEntryPath(
getIsBuddyMirrored()
? Program::getApp()->getBuddyMirrorDentriesPath()->str()
: Program::getApp()->getDentriesPath()->str(), parentID) + "/" + *namesIter;
// create a EntryInfo and put the information into an FsckDirEntry object
EntryInfo entryInfo;
FileInodeStoreData inodeDiskData;
bool hasInlinedInode = false;
int32_t saveDevice = 0;
uint64_t saveInode = 0;
auto [getEntryRes, isFileOpen] = metaStore->getEntryData(parentDirInode, *namesIter, &entryInfo,
&inodeDiskData);
if (getEntryRes == FhgfsOpsErr_SUCCESS ||
getEntryRes == FhgfsOpsErr_DYNAMICATTRIBSOUTDATED )
{
DirEntryType entryType = entryInfo.getEntryType();
const std::string& dentryID = entryInfo.getEntryID();
const std::string& dentryName = *namesIter;
NumNodeID dentryOwnerID = entryInfo.getOwnerNodeID();
FsckDirEntryType fsckEntryType = FsckTk::DirEntryTypeToFsckDirEntryType(entryType);
// stat the file to get device and inode information
struct stat statBuf;
int statRes = stat(filename.c_str(), &statBuf);
if (likely(!statRes))
{
saveDevice = statBuf.st_dev;
saveInode = statBuf.st_ino;
}
else
{
log.log(Log_CRITICAL, "Could not stat dir entry file; entryID: " + dentryID
+ ";filename: " + filename);
}
if ( (DirEntryType_ISFILE(entryType)) && (entryInfo.getIsInlined() ) )
{
hasInlinedInode = true;
}
FsckDirEntry fsckDirEntry(dentryID, dentryName, parentID, localNodeNumID,
dentryOwnerID, fsckEntryType, hasInlinedInode, localNodeNumID,
saveDevice, saveInode, entryInfo.getIsBuddyMirrored());
dirEntriesOutgoing.push_back(fsckDirEntry);
}
else
{
log.log(Log_WARNING, "Unable to create dir entry from entry with name " + *namesIter
+ " in directory with ID " + parentID);
}
// now, if the inode data is inlined we create an fsck inode object here
if ( hasInlinedInode )
{
std::string inodeID = inodeDiskData.getEntryID();
int pathInfoFlag;
if (inodeDiskData.getOrigFeature() == FileInodeOrigFeature_TRUE)
pathInfoFlag = PATHINFO_FEATURE_ORIG;
else
pathInfoFlag = PATHINFO_FEATURE_ORIG_UNKNOWN;
unsigned origUID = inodeDiskData.getOrigUID();
std::string origParentEntryID = inodeDiskData.getOrigParentEntryID();
PathInfo pathInfo(origUID, origParentEntryID, pathInfoFlag);
unsigned userID;
unsigned groupID;
int64_t fileSize;
unsigned numHardLinks;
uint64_t numBlocks;
StatData* statData;
StatData updatedStatData;
if (getEntryRes == FhgfsOpsErr_SUCCESS)
statData = inodeDiskData.getInodeStatData();
else
{
FhgfsOpsErr statRes = MsgHelperStat::stat(&entryInfo, true, getMsgHeaderUserID(),
updatedStatData);
if (statRes == FhgfsOpsErr_SUCCESS)
statData = &updatedStatData;
else
statData = NULL;
}
if ( statData )
{
userID = statData->getUserID();
groupID = statData->getGroupID();
fileSize = statData->getFileSize();
numHardLinks = statData->getNumHardlinks();
numBlocks = statData->getNumBlocks();
}
else
{
log.logErr(std::string("Unable to get stat data of inlined file inode: ") + inodeID
+ ". SysErr: " + System::getErrString());
userID = 0;
groupID = 0;
fileSize = 0;
numHardLinks = 0;
numBlocks = 0;
}
UInt16Vector stripeTargets;
unsigned chunkSize;
FsckStripePatternType stripePatternType = FsckTk::stripePatternToFsckStripePattern(
inodeDiskData.getPattern(), &chunkSize, &stripeTargets);
FsckFileInode fileInode(inodeID, parentID, localNodeNumID, pathInfo, userID, groupID,
fileSize, numHardLinks, numBlocks, stripeTargets, stripePatternType, chunkSize,
localNodeNumID, saveInode, saveDevice, true, entryInfo.getIsBuddyMirrored(),
true, inodeDiskData.getIsBuddyMirrored() != getIsBuddyMirrored());
inlinedFileInodesOutgoing.push_back(fileInode);
}
}
if ( parentDirInodeIsTemp )
SAFE_DELETE(parentDirInode);
else
metaStore->releaseDir(parentID);
if ( entryNames.size() < remainingOutNames )
{
// directory is at the end => proceed with next
hasNext = StorageTkEx::getNextContDirID(hashDirNum, getIsBuddyMirrored(),
lastHashDirOffset, &currentContDirID, &newHashDirOffset);
if ( hasNext )
{
lastHashDirOffset = newHashDirOffset;
lastContDirOffset = 0;
readOutEntries += entryNames.size();
// we found a new .cont directory => send it to fsck
FsckContDir contDir(currentContDirID, localNodeNumID, getIsBuddyMirrored());
contDirsOutgoing.push_back(contDir);
}
}
else
{
// there are more to come, but we need to exit the loop now, because maxCount is reached
hasNext = false;
}
}
ctx.sendResponse(
RetrieveDirEntriesRespMsg(&contDirsOutgoing, &dirEntriesOutgoing,
&inlinedFileInodesOutgoing, currentContDirID, lastHashDirOffset, lastContDirOffset) );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/fsck/RetrieveDirEntriesMsg.h>
#include <common/net/message/fsck/RetrieveDirEntriesRespMsg.h>
class RetrieveDirEntriesMsgEx : public RetrieveDirEntriesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,166 @@
#include "RetrieveFsIDsMsgEx.h"
#include <common/storage/striping/Raid0Pattern.h>
#include <common/threading/SafeRWLock.h>
#include <program/Program.h>
bool RetrieveFsIDsMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Incoming RetrieveFsIDsMsg");
App* app = Program::getApp();
MetaStore* metaStore = app->getMetaStore();
unsigned hashDirNum = getHashDirNum();
bool buddyMirrored = getBuddyMirrored();
std::string currentContDirID = getCurrentContDirID();
unsigned maxOutIDs = getMaxOutIDs();
int64_t lastContDirOffset = getLastContDirOffset();
int64_t lastHashDirOffset = getLastHashDirOffset();
int64_t newHashDirOffset;
FsckFsIDList fsIDsOutgoing;
unsigned readOutIDs = 0;
NumNodeID localNodeNumID = buddyMirrored
? NumNodeID(Program::getApp()->getMetaBuddyGroupMapper()->getLocalGroupID())
: Program::getApp()->getLocalNode().getNumID();
MirrorBuddyGroupMapper* bgm = Program::getApp()->getMetaBuddyGroupMapper();
if (buddyMirrored &&
(bgm->getLocalBuddyGroup().secondTargetID == app->getLocalNode().getNumID().val()
|| bgm->getLocalGroupID() == 0))
{
ctx.sendResponse(
RetrieveFsIDsRespMsg(&fsIDsOutgoing, currentContDirID, lastHashDirOffset,
lastContDirOffset));
return true;
}
bool hasNext;
if ( currentContDirID.empty() )
{
hasNext = StorageTkEx::getNextContDirID(hashDirNum, buddyMirrored, lastHashDirOffset,
&currentContDirID, &newHashDirOffset);
if ( hasNext )
lastHashDirOffset = newHashDirOffset;
}
else
hasNext = true;
while ( hasNext )
{
std::string parentID = currentContDirID;
std::string idPath = MetaStorageTk::getMetaDirEntryIDPath(
MetaStorageTk::getMetaDirEntryPath(
buddyMirrored
? app->getBuddyMirrorDentriesPath()->str()
: app->getDentriesPath()->str(),
parentID));
bool parentDirInodeIsTemp = false;
StringList outNames;
int64_t outNewServerOffset;
ListIncExOutArgs outArgs(&outNames, NULL, NULL, NULL, &outNewServerOffset);
FhgfsOpsErr listRes;
unsigned remainingOutIDs = maxOutIDs - readOutIDs;
DirInode* parentDirInode = metaStore->referenceDir(parentID, buddyMirrored, true);
// it could be, that parentDirInode does not exist
// in fsck we create a temporary inode for this case, so that we can modify the dentry
// hopefully, the inode itself will get fixed later
if ( unlikely(!parentDirInode) )
{
log.log(
Log_NOTICE,
"Could not reference directory. EntryID: " + parentID
+ " => using temporary directory inode ");
// create temporary inode
int mode = S_IFDIR | S_IRWXU;
UInt16Vector stripeTargets;
Raid0Pattern stripePattern(0, stripeTargets, 0);
parentDirInode = new DirInode(parentID, mode, 0, 0,
Program::getApp()->getLocalNode().getNumID(), stripePattern, buddyMirrored);
parentDirInodeIsTemp = true;
}
listRes = parentDirInode->listIDFilesIncremental(lastContDirOffset, 0, remainingOutIDs,
outArgs);
lastContDirOffset = outNewServerOffset;
if ( parentDirInodeIsTemp )
SAFE_DELETE(parentDirInode);
else
metaStore->releaseDir(parentID);
if (listRes != FhgfsOpsErr_SUCCESS)
{
log.logErr("Could not read dentry-by-ID files; parentID: " + parentID);
}
// process entries
readOutIDs += outNames.size();
for ( StringListIter iter = outNames.begin(); iter != outNames.end(); iter++ )
{
std::string id = *iter;
std::string filename = idPath + "/" + id;
// stat the file to get device and inode information
struct stat statBuf;
int statRes = stat(filename.c_str(), &statBuf);
int saveDevice;
uint64_t saveInode;
if ( likely(!statRes) )
{
saveDevice = statBuf.st_dev;
saveInode = statBuf.st_ino;
}
else
{
saveDevice = 0;
saveInode = 0;
log.log(Log_CRITICAL,
"Could not stat ID file; ID: " + id + ";filename: " + filename);
}
FsckFsID fsID(id, parentID, localNodeNumID, saveDevice, saveInode, buddyMirrored);
fsIDsOutgoing.push_back(fsID);
}
if ( readOutIDs < maxOutIDs )
{
// directory is at the end => proceed with next
hasNext = StorageTkEx::getNextContDirID(hashDirNum, buddyMirrored, lastHashDirOffset,
&currentContDirID, &newHashDirOffset);
if ( hasNext )
{
lastHashDirOffset = newHashDirOffset;
lastContDirOffset = 0;
}
}
else
{
// there are more to come, but we need to exit the loop now, because maxCount is reached
hasNext = false;
}
}
ctx.sendResponse(
RetrieveFsIDsRespMsg(&fsIDsOutgoing, currentContDirID, lastHashDirOffset,
lastContDirOffset) );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/fsck/RetrieveFsIDsMsg.h>
#include <common/net/message/fsck/RetrieveFsIDsRespMsg.h>
class RetrieveFsIDsMsgEx : public RetrieveFsIDsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,25 @@
#include "RetrieveInodesMsgEx.h"
#include <program/Program.h>
bool RetrieveInodesMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Incoming RetrieveInodesMsg");
MetaStore *metaStore = Program::getApp()->getMetaStore();
unsigned hashDirNum = getHashDirNum();
unsigned maxOutInodes = getMaxOutInodes();
int64_t lastOffset = getLastOffset();
int64_t newOffset;
FsckFileInodeList fileInodesOutgoing;
FsckDirInodeList dirInodesOutgoing;
metaStore->getAllInodesIncremental(hashDirNum, lastOffset, maxOutInodes, &dirInodesOutgoing,
&fileInodesOutgoing, &newOffset, getIsBuddyMirrored());
ctx.sendResponse(RetrieveInodesRespMsg(&fileInodesOutgoing, &dirInodesOutgoing, newOffset) );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/fsck/RetrieveInodesMsg.h>
#include <common/net/message/fsck/RetrieveInodesRespMsg.h>
class RetrieveInodesMsgEx : public RetrieveInodesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,51 @@
#include "UpdateDirAttribsMsgEx.h"
#include <storage/MetaStore.h>
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool UpdateDirAttribsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "UpdateDirAttribsMsg incoming";
MetaStore* metaStore = Program::getApp()->getMetaStore();
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
FsckDirInodeList& inodes = getInodes();
FsckDirInodeList failedInodes;
for (FsckDirInodeListIter iter = inodes.begin(); iter != inodes.end(); iter++)
{
// call the updating method
const std::string& dirID = iter->getID();
FileIDLock lock;
if (iter->getIsBuddyMirrored())
lock = {entryLockStore, dirID, true};
DirInode* dirInode = metaStore->referenceDir(dirID, iter->getIsBuddyMirrored(), true);
if (!dirInode)
{
LogContext(logContext).logErr("Unable to reference directory; ID: " + dirID);
failedInodes.push_back(*iter);
continue;
}
FhgfsOpsErr refreshRes = dirInode->refreshMetaInfo();
metaStore->releaseDir(dirID);
if (refreshRes != FhgfsOpsErr_SUCCESS)
{
LogContext(logContext).log(Log_WARNING, "Failed to update attributes of directory. "
"entryID: " + dirID);
failedInodes.push_back(*iter);
}
}
ctx.sendResponse(UpdateDirAttribsRespMsg(&failedInodes) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/UpdateDirAttribsMsg.h>
#include <common/net/message/fsck/UpdateDirAttribsRespMsg.h>
class UpdateDirAttribsMsgEx : public UpdateDirAttribsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,62 @@
#include "UpdateFileAttribsMsgEx.h"
#include <net/msghelpers/MsgHelperStat.h>
#include <program/Program.h>
#include <toolkit/BuddyCommTk.h>
bool UpdateFileAttribsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "UpdateFileAttribsMsg incoming";
MetaStore* metaStore = Program::getApp()->getMetaStore();
EntryLockStore* entryLockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
FsckFileInodeList& inodes = getInodes();
FsckFileInodeList failedInodes;
for (FsckFileInodeListIter iter = inodes.begin(); iter != inodes.end(); iter++)
{
// create an EntryInfo object (NOTE: dummy fileName)
EntryInfo entryInfo(iter->getSaveNodeID(), iter->getParentDirID(), iter->getID(), "",
DirEntryType_REGULARFILE,
(iter->getIsBuddyMirrored() ? ENTRYINFO_FEATURE_BUDDYMIRRORED : 0) |
(iter->getIsInlined() ? ENTRYINFO_FEATURE_INLINED : 0));
FileIDLock lock;
if (iter->getIsBuddyMirrored())
lock = {entryLockStore, entryInfo.getEntryID(), true};
auto [inode, referenceRes] = metaStore->referenceFile(&entryInfo);
if (inode)
{
inode->setNumHardlinksUnpersistent(iter->getNumHardLinks());
inode->updateInodeOnDisk(&entryInfo);
// call the dynamic attribs refresh method
FhgfsOpsErr refreshRes = MsgHelperStat::refreshDynAttribs(&entryInfo, true,
getMsgHeaderUserID() );
if (refreshRes != FhgfsOpsErr_SUCCESS)
{
LogContext(logContext).log(Log_WARNING, "Failed to update dynamic attributes of file. "
"entryID: " + iter->getID());
failedInodes.push_back(*iter);
}
/* only release it here, as refreshDynAttribs() also takes an inode reference and can
* do the reference from in-memory data then */
metaStore->releaseFile(entryInfo.getParentEntryID(), inode);
}
else
{
LogContext(logContext).log(Log_WARNING, "Could not reference inode to update attributes. "
"entryID: " + iter->getID());
failedInodes.push_back(*iter);
}
}
ctx.sendResponse(UpdateFileAttribsRespMsg(&failedInodes) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/UpdateFileAttribsMsg.h>
#include <common/net/message/fsck/UpdateFileAttribsRespMsg.h>
class UpdateFileAttribsMsgEx : public UpdateFileAttribsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,39 @@
#include <common/components/worker/queue/MultiWorkQueue.h>
#include <program/Program.h>
#include <session/SessionStore.h>
#include "RequestMetaDataMsgEx.h"
bool RequestMetaDataMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("RequestMetaDataMsg incoming");
App *app = Program::getApp();
Node& node = app->getLocalNode();
MultiWorkQueue *workQueue = app->getWorkQueue();
unsigned sessionCount = app->getSessions()->getSize() + app->getMirroredSessions()->getSize();
NicAddressList nicList(node.getNicList());
std::string hostnameid = System::getHostname();
// highresStats
HighResStatsList statsHistory;
uint64_t lastStatsMS = getValue();
// get stats history
StatsCollector* statsCollector = app->getStatsCollector();
statsCollector->getStatsSince(lastStatsMS, statsHistory);
RequestMetaDataRespMsg requestMetaDataRespMsg(node.getAlias(), hostnameid, node.getNumID(), &nicList,
app->getMetaRoot().getID() == node.getNumID(), workQueue->getIndirectWorkListSize(),
workQueue->getDirectWorkListSize(), sessionCount, &statsHistory);
ctx.sendResponse(requestMetaDataRespMsg);
LOG_DEBUG_CONTEXT(log, 5, std::string("Sent a message with type: " ) +
StringTk::uintToStr(requestMetaDataRespMsg.getMsgType() ) + std::string(" to mon") );
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), MetaOpCounter_REQUESTMETADATA,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <app/App.h>
#include <common/app/log/LogContext.h>
#include <common/net/message/mon/RequestMetaDataMsg.h>
#include <common/net/message/mon/RequestMetaDataRespMsg.h>
class RequestMetaDataMsgEx : public RequestMetaDataMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,900 @@
#include <common/net/message/nodes/GenericDebugRespMsg.h>
#include <common/net/msghelpers/MsgHelperGenericDebug.h>
#include <common/storage/quota/Quota.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include <session/SessionStore.h>
#include "GenericDebugMsgEx.h"
#define GENDBGMSG_OP_LISTFILEAPPENDLOCKS "listfileappendlocks"
#define GENDBGMSG_OP_LISTFILEENTRYLOCKS "listfileentrylocks"
#define GENDBGMSG_OP_LISTFILERANGELOCKS "listfilerangelocks"
#define GENDBGMSG_OP_LISTOPENFILES "listopenfiles"
#define GENDBGMSG_OP_REFERENCESTATISTICS "refstats"
#define GENDBGMSG_OP_CACHESTATISTICS "cachestats"
#define GENDBGMSG_OP_VERSION "version"
#define GENDBGMSG_OP_MSGQUEUESTATS "msgqueuestats"
#define GENDBGMSG_OP_LISTPOOLS "listpools"
#define GENDBGMSG_OP_DUMPDENTRY "dumpdentry"
#define GENDBGMSG_OP_DUMPINODE "dumpinode"
#define GENDBGMSG_OP_DUMPINLINEDINODE "dumpinlinedinode"
#ifdef BEEGFS_DEBUG
#define GENDBGMSG_OP_WRITEDIRDENTRY "writedirdentry"
#define GENDBGMSG_OP_WRITEDIRINODE "writedirinode"
#define GENDBGMSG_OP_WRITEFILEINODE "writefileinode"
#endif // BEEGFS_DEBUG
bool GenericDebugMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("GenericDebugMsg incoming");
LOG_DEBUG_CONTEXT(log, 5, std::string("Command string: ") + getCommandStr() );
std::string cmdRespStr = processCommand();
ctx.sendResponse(GenericDebugRespMsg(cmdRespStr.c_str() ) );
return true;
}
/**
* @return command response string
*/
std::string GenericDebugMsgEx::processCommand()
{
App* app = Program::getApp();
Config* cfg = app->getConfig();
std::string responseStr;
std::string operation;
// load command string into a stream to allow us to use getline
std::istringstream commandStream(getCommandStr() );
// get operation type from command string
std::getline(commandStream, operation, ' ');
if(operation == GENDBGMSG_OP_LISTFILEAPPENDLOCKS)
responseStr = processOpListFileAppendLocks(commandStream);
else
if(operation == GENDBGMSG_OP_LISTFILEENTRYLOCKS)
responseStr = processOpListFileEntryLocks(commandStream);
else
if(operation == GENDBGMSG_OP_LISTFILERANGELOCKS)
responseStr = processOpListFileRangeLocks(commandStream);
else
if(operation == GENDBGMSG_OP_LISTOPENFILES)
responseStr = processOpListOpenFiles(commandStream);
else
if(operation == GENDBGMSG_OP_REFERENCESTATISTICS)
responseStr = processOpReferenceStatistics(commandStream);
else
if(operation == GENDBGMSG_OP_CACHESTATISTICS)
responseStr = processOpCacheStatistics(commandStream);
else
if(operation == GENDBGMSG_OP_VERSION)
responseStr = processOpVersion(commandStream);
else
if(operation == GENDBGMSG_OP_MSGQUEUESTATS)
responseStr = processOpMsgQueueStats(commandStream);
else
if(operation == GENDBGMSG_OP_VARLOGMESSAGES)
responseStr = MsgHelperGenericDebug::processOpVarLogMessages(commandStream);
else
if(operation == GENDBGMSG_OP_VARLOGKERNLOG)
responseStr = MsgHelperGenericDebug::processOpVarLogKernLog(commandStream);
else
if(operation == GENDBGMSG_OP_FHGFSLOG)
responseStr = MsgHelperGenericDebug::processOpFhgfsLog(commandStream);
else
if(operation == GENDBGMSG_OP_LOADAVG)
responseStr = MsgHelperGenericDebug::processOpLoadAvg(commandStream);
else
if(operation == GENDBGMSG_OP_DROPCACHES)
responseStr = MsgHelperGenericDebug::processOpDropCaches(commandStream);
else
if(operation == GENDBGMSG_OP_GETCFG)
responseStr = MsgHelperGenericDebug::processOpCfgFile(commandStream, cfg->getCfgFile() );
else
if(operation == GENDBGMSG_OP_GETLOGLEVEL)
responseStr = MsgHelperGenericDebug::processOpGetLogLevel(commandStream);
else
if(operation == GENDBGMSG_OP_SETLOGLEVEL)
responseStr = MsgHelperGenericDebug::processOpSetLogLevel(commandStream);
else
if(operation == GENDBGMSG_OP_NETOUT)
responseStr = MsgHelperGenericDebug::processOpNetOut(commandStream,
app->getMgmtNodes(), app->getMetaNodes(), app->getStorageNodes() );
else
if(operation == GENDBGMSG_OP_LISTMETASTATES)
responseStr = MsgHelperGenericDebug::processOpListTargetStates(commandStream,
app->getMetaStateStore() );
else
if(operation == GENDBGMSG_OP_LISTSTORAGESTATES)
responseStr = MsgHelperGenericDebug::processOpListTargetStates(commandStream,
app->getTargetStateStore() );
else
if(operation == GENDBGMSG_OP_LISTPOOLS)
responseStr = processOpListPools(commandStream);
else
if(operation == GENDBGMSG_OP_DUMPDENTRY)
responseStr = processOpDumpDentry(commandStream);
else
if(operation == GENDBGMSG_OP_DUMPINODE)
responseStr = processOpDumpInode(commandStream);
else
if(operation == GENDBGMSG_OP_DUMPINLINEDINODE)
responseStr = processOpDumpInlinedInode(commandStream);
else
if(operation == GENDBGMSG_OP_QUOTAEXCEEDED)
responseStr = processOpQuotaExceeded(commandStream);
else if(operation == GENDBGMSG_OP_LISTSTORAGEPOOLS)
responseStr = MsgHelperGenericDebug::processOpListStoragePools(commandStream,
app->getStoragePoolStore());
#ifdef BEEGFS_DEBUG
else
if(operation == GENDBGMSG_OP_WRITEDIRDENTRY)
responseStr = processOpWriteDirDentry(commandStream);
else
if(operation == GENDBGMSG_OP_WRITEDIRINODE)
responseStr = processOpWriteDirInode(commandStream);
else
if(operation == GENDBGMSG_OP_WRITEFILEINODE)
responseStr = processOpWriteInlinedFileInode(commandStream);
#endif // BEEGFS_DEBUG
else
responseStr = "Unknown/invalid operation";
return responseStr;
}
/**
* Retrieve append lock stats for a certain file.
*/
std::string GenericDebugMsgEx::processOpListFileAppendLocks(std::istringstream& commandStream)
{
// procotol: entryID as only argument
std::string parentEntryID;
std::string entryID;
std::string responseStr;
std::string isBuddyMirroredStr;
bool isBuddyMirrored;
// get entryID from command string
std::getline(commandStream, parentEntryID, ' ');
std::getline(commandStream, entryID, ' ');
std::getline(commandStream, isBuddyMirroredStr, ' ');
if (parentEntryID.empty() )
return "Invalid or missing parentEntryID";
if(entryID.empty() )
return "Invalid or missing entryID";
if(isBuddyMirroredStr.empty())
isBuddyMirrored = false;
else
isBuddyMirrored = StringTk::strToBool(isBuddyMirroredStr);
MetaStore* metaStore = Program::getApp()->getMetaStore();
MetaFileHandle inode = metaStore->referenceLoadedFile(parentEntryID, isBuddyMirrored, entryID);
if(!inode)
return "FileID not exists: " + entryID;
responseStr = inode->flockAppendGetAllAsStr();
metaStore->releaseFile(parentEntryID, inode);
return responseStr;
}
std::string GenericDebugMsgEx::processOpListFileEntryLocks(std::istringstream& commandStream)
{
// procotol: entryID as only argument
std::string parentEntryID;
std::string entryID;
std::string responseStr;
std::string isBuddyMirroredStr;
bool isBuddyMirrored;
// get entryID from command string
std::getline(commandStream, parentEntryID, ' ');
std::getline(commandStream, entryID, ' ');
std::getline(commandStream, isBuddyMirroredStr, ' ');
if (parentEntryID.empty() )
return "Invalid or missing parentEntryID";
if(entryID.empty() )
return "Invalid or missing entryID";
if(isBuddyMirroredStr.empty())
isBuddyMirrored = false;
else
isBuddyMirrored = StringTk::strToBool(isBuddyMirroredStr);
MetaStore* metaStore = Program::getApp()->getMetaStore();
MetaFileHandle inode = metaStore->referenceLoadedFile(parentEntryID, isBuddyMirrored, entryID);
if(!inode)
return "FileID not exists: " + entryID;
responseStr = inode->flockEntryGetAllAsStr();
metaStore->releaseFile(parentEntryID, inode);
return responseStr;
}
std::string GenericDebugMsgEx::processOpListFileRangeLocks(std::istringstream& commandStream)
{
// procotol: entryID as only argument
std::string parentEntryID;
std::string entryID;
std::string isBuddyMirroredStr;
bool isBuddyMirrored;
// get parentEntryID from command string
std::getline(commandStream, parentEntryID, ' ');
if(parentEntryID.empty() )
return "Invalid or missing parentEntryID";
// get entryID from command string
std::getline(commandStream, entryID, ' ');
if(entryID.empty() )
return "Invalid or missing entryID";
// get isBuddyMirrored from command string
std::getline(commandStream, isBuddyMirroredStr, ' ');
if(isBuddyMirroredStr.empty())
isBuddyMirrored = false;
else
isBuddyMirrored = StringTk::strToBool(isBuddyMirroredStr);
MetaStore* metaStore = Program::getApp()->getMetaStore();
MetaFileHandle file = metaStore->referenceLoadedFile(parentEntryID, isBuddyMirrored, entryID);
if(!file)
return "FileID not found: " + entryID;
std::string responseStr = file->flockRangeGetAllAsStr();
metaStore->releaseFile(parentEntryID, file);
return responseStr;
}
std::string GenericDebugMsgEx::processOpListOpenFiles(std::istringstream& commandStream)
{
// protocol: no arguments
App* app = Program::getApp();
SessionStore* sessions = app->getSessions();
SessionStore* mirroredSessions = app->getMirroredSessions();
std::ostringstream responseStream;
size_t numFilesTotal = 0;
size_t numCheckedSessions = 0; // may defer from number of initially queried sessions
NumNodeIDList sessionIDs = sessions->getAllSessionIDs();
NumNodeIDList mirroredSessionIDs = mirroredSessions->getAllSessionIDs();
responseStream << "Found " << sessionIDs.size() << " non-mirrored sessions and "
<< mirroredSessionIDs.size() << " mirrored sessions." << std::endl;
responseStream << std::endl;
responseStream << "Non-mirrored sessions:" << std::endl;
// walk over all sessions
for(NumNodeIDListCIter iter = sessionIDs.begin(); iter != sessionIDs.end(); iter++)
{
Session* session = sessions->referenceSession(*iter, false);
// note: sessionID might have been removed since we queried it, e.g. because client is gone
if(!session)
continue;
numCheckedSessions++;
SessionFileStore* sessionFiles = session->getFiles();
size_t numFiles = sessionFiles->getSize();
sessions->releaseSession(session);
if(!numFiles)
continue; // only print sessions with open files
numFilesTotal += numFiles;
responseStream << *iter << ": " << numFiles << std::endl;
}
responseStream << "Mirrored sessions:" << std::endl;
// ...and the mirrored sessions
for(NumNodeIDListCIter iter = mirroredSessionIDs.begin(); iter != mirroredSessionIDs.end();
++iter)
{
Session* session = mirroredSessions->referenceSession(*iter, false);
if (!session)
continue;
numCheckedSessions++;
SessionFileStore* sessionFiles = session->getFiles();
size_t numFiles = sessionFiles->getSize();
mirroredSessions->releaseSession(session);
if (!numFiles)
continue;
numFilesTotal += numFiles;
responseStream << *iter << ": " << numFiles << std::endl;
}
responseStream << std::endl;
responseStream << "Final results: " << numFilesTotal << " open files in " <<
numCheckedSessions << " checked sessions";
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpReferenceStatistics(std::istringstream& commandStream)
{
// protocol: no arguments
App* app = Program::getApp();
MetaStore* metaStore = app->getMetaStore();
std::ostringstream responseStream;
size_t numDirs;
size_t numFiles;
metaStore->getReferenceStats(&numDirs, &numFiles);
responseStream << "Dirs: " << numDirs << std::endl;
responseStream << "Files: " << numFiles;
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpCacheStatistics(std::istringstream& commandStream)
{
// protocol: no arguments
App* app = Program::getApp();
MetaStore* metaStore = app->getMetaStore();
std::ostringstream responseStream;
size_t numDirs;
metaStore->getCacheStats(&numDirs);
responseStream << "Dirs: " << numDirs;
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpVersion(std::istringstream& commandStream)
{
return BEEGFS_VERSION;
}
std::string GenericDebugMsgEx::processOpMsgQueueStats(std::istringstream& commandStream)
{
// protocol: no arguments
App* app = Program::getApp();
MultiWorkQueue* workQ = app->getWorkQueue();
std::ostringstream responseStream;
std::string indirectQueueStats;
std::string directQueueStats;
std::string busyStats;
workQ->getStatsAsStr(indirectQueueStats, directQueueStats, busyStats);
responseStream << "general queue stats: " << std::endl <<
indirectQueueStats << std::endl;
responseStream << "direct queue stats: " << std::endl <<
directQueueStats << std::endl;
responseStream << "busy worker stats: " << std::endl <<
busyStats << std::endl;
return responseStream.str();
}
/**
* List internal state of meta and storage capacity pools.
*/
std::string GenericDebugMsgEx::processOpListPools(std::istringstream& commandStream)
{
// protocol: no arguments
const App* app = Program::getApp();
const NodeCapacityPools* metaPools = app->getMetaCapacityPools();
std::ostringstream responseStream;
responseStream << "META POOLS STATE: " << std::endl << metaPools->getStateAsStr() << std::endl;
const StoragePoolPtrVec storagePools = app->getStoragePoolStore()->getPoolsAsVec();
for (auto iter = storagePools.begin(); iter != storagePools.end(); iter++)
{
const TargetCapacityPools* capPool = (*iter)->getTargetCapacityPools();
responseStream << "STORAGE CAPACITY POOLS STATE (STORAGE POOL ID: " << (*iter)->getId()
<< "): " << std::endl << capPool->getStateAsStr() << std::endl;
}
for (auto iter = storagePools.begin(); iter != storagePools.end(); iter++)
{
const NodeCapacityPools* capPool = (*iter)->getBuddyCapacityPools();
responseStream << "STORAGE BUDDY CAPACITY POOLS STATE (STORAGE POOL ID: "
<< (*iter)->getId() << "): " << std::endl << capPool->getStateAsStr()
<< std::endl;
}
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpDumpDentry(std::istringstream& commandStream)
{
MetaStore* metaStore = Program::getApp()->getMetaStore();
std::ostringstream responseStream;
StringList parameterList;
StringTk::explode(commandStream.str(), ' ', &parameterList);
if ( parameterList.size() < 3 || parameterList.size() > 4 )
return "Invalid or missing parameters; Parameter format: parentDirID entryName "
"[isBuddyMirrored]";
StringListIter iter = parameterList.begin();
iter++;
std::string parentDirID = *iter;
iter++;
std::string entryName = *iter;
iter++;
bool isBuddyMirrored = false;
if (iter != parameterList.end())
{
isBuddyMirrored = StringTk::strToBool(*iter);
}
DirInode* parentDirInode = metaStore->referenceDir(parentDirID, isBuddyMirrored, false);
if (!parentDirInode)
return "Unable to reference parent directory.";
DirEntry dentry(entryName);
bool getDentryRes = parentDirInode->getDentry(entryName, dentry);
metaStore->releaseDir(parentDirID);
if (!getDentryRes)
return "Unable to get dentry from parent directory.";
responseStream << "entryType: " << dentry.getEntryType() << std::endl;
responseStream << "ID: " << dentry.getID() << std::endl;
responseStream << "ownerNodeID: " << dentry.getOwnerNodeID() << std::endl;
responseStream << "featureFlags: " << dentry.getDentryFeatureFlags() << std::endl;
return responseStream.str();
}
#ifdef BEEGFS_DEBUG
std::string GenericDebugMsgEx::processOpWriteDirDentry(std::istringstream& commandStream)
{
MetaStore* metaStore = Program::getApp()->getMetaStore();
std::string dentriesPath = Program::getApp()->getDentriesPath()->str();
std::ostringstream responseStream;
StringList parameterList;
StringTk::explode(commandStream.str(), ' ', &parameterList);
if ( parameterList.size() < 4 || parameterList.size() > 5 )
return "Invalid or missing parameters; Parameter format: parentDirID entryName ownerNodeID "
"[isBuddyMirrored]";
StringListIter iter = parameterList.begin();
iter++;
std::string parentDirID = *iter;
iter++;
std::string entryName = *iter;
iter++;
NumNodeID ownerNodeID(StringTk::strToUInt(*iter) );
iter++;
bool isBuddyMirrored = false;
if (iter!=parameterList.end())
{
isBuddyMirrored = StringTk::strToBool(*iter);
}
DirInode* parentDirInode = metaStore->referenceDir(parentDirID, isBuddyMirrored, true);
if (!parentDirInode)
return "Unable to reference parent directory.";
DirEntry dentry(entryName);
bool getDentryRes = parentDirInode->getDentry(entryName, dentry);
metaStore->releaseDir(parentDirID);
if (!getDentryRes)
return "Unable to get dentry from parent directory.";
bool setOwnerRes = dentry.setOwnerNodeID(
MetaStorageTk::getMetaDirEntryPath(dentriesPath, parentDirID), ownerNodeID);
if (!setOwnerRes)
return "Unable to set new owner node ID in dentry.";
return "OK";
}
#endif // BEEGFS_DEBUG
std::string GenericDebugMsgEx::processOpDumpInode(std::istringstream& commandStream)
{
// commandStream: ID of inode
MetaStore* metaStore = Program::getApp()->getMetaStore();
std::ostringstream responseStream;
std::string inodeID;
std::string isBuddyMirroredStr;
bool isBuddyMirrored;
// get inodeID from command string
std::getline(commandStream, inodeID, ' ');
if(inodeID.empty() )
return "Invalid or missing inode ID";
// get isBuddyMirrored from command string
std::getline(commandStream, isBuddyMirroredStr, ' ');
if(isBuddyMirroredStr.empty())
isBuddyMirrored = false;
else
isBuddyMirrored = StringTk::strToBool(isBuddyMirroredStr);
MetaFileHandle fileInode;
DirInode* dirInode = NULL;
metaStore->referenceInode(inodeID, isBuddyMirrored, fileInode, dirInode);
if (fileInode)
{
StatData statData;
if (fileInode->getStatData(statData) != FhgfsOpsErr_SUCCESS)
{ // stat data retrieval failed
metaStore->releaseFile("", fileInode);
return "Could not get stat data for requested file inode";
}
DirEntryType dirEntryType = MetadataTk::posixFileTypeToDirEntryType(fileInode->getMode() );
std::string parentDirID = "cannotBeUsed";
uint16_t parentNodeID = 0;
responseStream << "entryType: " << dirEntryType << std::endl;
responseStream << "parentEntryID: " << parentDirID << std::endl;
responseStream << "parentNodeID: " << StringTk::uintToStr(parentNodeID) << std::endl;
responseStream << "mode: " << StringTk::intToStr(statData.getMode()) << std::endl;
responseStream << "uid: " << StringTk::uintToStr(statData.getUserID()) << std::endl;
responseStream << "gid: " << StringTk::uintToStr(statData.getGroupID()) << std::endl;
responseStream << "filesize: " << StringTk::int64ToStr(statData.getFileSize()) << std::endl;
responseStream << "ctime: " << StringTk::int64ToStr(statData.getCreationTimeSecs()) << std::endl;
responseStream << "atime: " << StringTk::int64ToStr(statData.getLastAccessTimeSecs()) << std::endl;
responseStream << "mtime: " << StringTk::int64ToStr(statData.getModificationTimeSecs()) << std::endl;
responseStream << "hardlinks: " << StringTk::intToStr(statData.getNumHardlinks()) << std::endl;
responseStream << "stripeTargets: "
<< StringTk::uint16VecToStr(fileInode->getStripePattern()->getStripeTargetIDs())
<< std::endl;
responseStream << "chunkSize: "
<< StringTk::uintToStr(fileInode->getStripePattern()->getChunkSize()) << std::endl;
responseStream << "featureFlags: " << fileInode->getFeatureFlags() << std::endl;
metaStore->releaseFile("", fileInode);
}
else
if (dirInode)
{
StatData statData;
if (dirInode->getStatData(statData) != FhgfsOpsErr_SUCCESS)
{ // stat data retrieval failed
metaStore->releaseDir(inodeID);
return "Could not get stat data for requested dir inode";
}
DirEntryType dirEntryType = MetadataTk::posixFileTypeToDirEntryType(dirInode->getMode() );
std::string parentDirID;
NumNodeID parentNodeID;
dirInode->getParentInfo(&parentDirID, &parentNodeID);
responseStream << "entryType: " << dirEntryType << std::endl;
responseStream << "parentEntryID: " << parentDirID << std::endl;
responseStream << "parentNodeID: " << parentNodeID.str() << std::endl;
responseStream << "ownerNodeID: " << dirInode->getOwnerNodeID().str() << std::endl;
responseStream << "mode: " << StringTk::intToStr(statData.getMode()) << std::endl;
responseStream << "uid: " << StringTk::uintToStr(statData.getUserID()) << std::endl;
responseStream << "gid: " << StringTk::uintToStr(statData.getGroupID()) << std::endl;
responseStream << "size: " << StringTk::int64ToStr(statData.getFileSize()) << std::endl;
responseStream << "numLinks: " << StringTk::int64ToStr(statData.getNumHardlinks())
<< std::endl;
responseStream << "ctime: " << StringTk::int64ToStr(statData.getCreationTimeSecs())
<< std::endl;
responseStream << "atime: " << StringTk::int64ToStr(statData.getLastAccessTimeSecs())
<< std::endl;
responseStream << "mtime: " << StringTk::int64ToStr(statData.getModificationTimeSecs())
<< std::endl;
responseStream << "featureFlags: " << dirInode->getFeatureFlags() << std::endl;
metaStore->releaseDir(inodeID);
}
else
{
return "Could not read requested inode";
}
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpDumpInlinedInode(std::istringstream& commandStream)
{
// commandStream: parentID, name
MetaStore* metaStore = Program::getApp()->getMetaStore();
NumNodeID localNodeID = Program::getApp()->getLocalNode().getNumID();
std::ostringstream responseStream;
StringList parameterList;
StringTk::explode(commandStream.str(), ' ', &parameterList);
if ( parameterList.size() < 3 || parameterList.size() > 4 )
return "Invalid or missing parameters; Parameter format: parentDirID entryName "
"[isBuddyMirrored]";
StringListIter iter = parameterList.begin();
iter++;
std::string parentEntryID = *iter;
iter++;
std::string entryName = *iter;
iter++;
bool isBuddyMirrored = false;
if (iter != parameterList.end())
{
isBuddyMirrored = StringTk::strToBool(*iter);
}
EntryInfo entryInfo(localNodeID, parentEntryID, "unknown", entryName, DirEntryType_REGULARFILE,
0);
DirInode* parentInode = metaStore->referenceDir(parentEntryID, isBuddyMirrored, false);
if ( !parentInode )
return "Could not open parent directory";
DirEntry dirEntry(entryName);
bool getDentryRes = parentInode->getDentry(entryName, dirEntry);
if ( !getDentryRes )
{
metaStore->releaseDir(parentEntryID);
return "Could not open dir entry";
}
FileInodeStoreData* inodeData = dirEntry.getInodeStoreData();
if ( !inodeData )
{
metaStore->releaseDir(parentEntryID);
return "Could not get inlined inode data";
}
StatData* statData = inodeData->getInodeStatData();
if ( !statData )
{
metaStore->releaseDir(parentEntryID);
return "Could not get stat data for requested file inode";
}
responseStream << "entryID: " << inodeData->getEntryID() << std::endl;
responseStream << "mode: " << StringTk::intToStr(statData->getMode()) << std::endl;
responseStream << "uid: " << StringTk::uintToStr(statData->getUserID()) << std::endl;
responseStream << "gid: " << StringTk::uintToStr(statData->getGroupID()) << std::endl;
responseStream << "filesize: " << StringTk::int64ToStr(statData->getFileSize()) << std::endl;
responseStream << "ctime: " << StringTk::int64ToStr(statData->getCreationTimeSecs()) << std::endl;
responseStream << "atime: " << StringTk::int64ToStr(statData->getLastAccessTimeSecs())
<< std::endl;
responseStream << "mtime: " << StringTk::int64ToStr(statData->getModificationTimeSecs())
<< std::endl;
responseStream << "hardlinks: " << StringTk::intToStr(statData->getNumHardlinks()) << std::endl;
responseStream << "stripeTargets: "
<< StringTk::uint16VecToStr(inodeData->getPattern()->getStripeTargetIDs()) << std::endl;
responseStream << "chunkSize: "
<< StringTk::uintToStr(inodeData->getPattern()->getChunkSize()) << std::endl;
responseStream << "featureFlags: " << inodeData->getInodeFeatureFlags() << std::endl;
metaStore->releaseDir(parentEntryID);
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpQuotaExceeded(std::istringstream& commandStream)
{
App* app = Program::getApp();
std::string targetIdStr;
std::getline(commandStream, targetIdStr, ' ');
uint16_t targetId = StringTk::strToUInt(targetIdStr);
std::string returnString;
if(!app->getConfig()->getQuotaEnableEnforcement() )
return "No quota exceeded IDs on this storage daemon because quota enforcement is"
"disabled.";
ExceededQuotaStorePtr exQuotaStore = app->getExceededQuotaStores()->get(targetId);
// exQuotaStore may be null;needs to be checked in MsgHelperGenericDebug::processOpQuotaExceeded
return MsgHelperGenericDebug::processOpQuotaExceeded(commandStream, exQuotaStore.get());
}
#ifdef BEEGFS_DEBUG
std::string GenericDebugMsgEx::processOpWriteDirInode(std::istringstream& commandStream)
{
MetaStore* metaStore = Program::getApp()->getMetaStore();
// get parameters from command string
StringVector paramVec;
StringTk::explode(commandStream.str(), ' ', &paramVec);
std::string entryID;
std::string parentDirID;
NumNodeID parentNodeID;
NumNodeID ownerNodeID;
int mode;
uint uid;
uint gid;
int64_t size;
unsigned numLinks;
bool isBuddyMirrored;
try
{
unsigned i = 1;
entryID = paramVec.at(i++);
parentDirID = paramVec.at(i++);
parentNodeID = NumNodeID(StringTk::strToUInt(paramVec.at(i++)));
ownerNodeID = NumNodeID(StringTk::strToUInt(paramVec.at(i++)));
mode = StringTk::strToInt(paramVec.at(i++));
uid = StringTk::strToUInt(paramVec.at(i++));
gid = StringTk::strToUInt(paramVec.at(i++));
size = StringTk::strToInt64(paramVec.at(i++));
numLinks = StringTk::strToUInt(paramVec.at(i++));
if (i<paramVec.size())
isBuddyMirrored = StringTk::strToBool(paramVec.at(i));
else
isBuddyMirrored = false;
}
catch (std::out_of_range& e)
{
std::string paramFormatStr =
"entryID parentDirID parentNodeID ownerNodeID mode uid gid size numLinks "
"[isBuddyMirrored]";
return "Invalid or missing parameters; Parameter format: " + paramFormatStr;
}
DirInode* dirInode = metaStore->referenceDir(entryID, isBuddyMirrored, true);
if ( !dirInode )
return "Could not find directory with ID: " + entryID;
StatData statData;
if ( dirInode->getStatData(statData) != FhgfsOpsErr_SUCCESS )
{
metaStore->releaseDir(entryID);
return "Could not get stat data for requested dir inode";
}
dirInode->setParentInfoInitial(parentDirID, parentNodeID);
dirInode->setOwnerNodeID(ownerNodeID);
statData.setMode(mode);
statData.setUserID(uid);
statData.setGroupID(gid);
statData.setFileSize(size);
statData.setNumHardLinks(numLinks);
dirInode->setStatData(statData);
metaStore->releaseDir(entryID);
return "OK";
}
std::string GenericDebugMsgEx::processOpWriteInlinedFileInode(std::istringstream& commandStream)
{
MetaStore* metaStore = Program::getApp()->getMetaStore();
std::string retStr = "OK";
// get parameters from command string
StringVector paramVec;
StringTk::explode(commandStream.str(), ' ', &paramVec);
std::string parentDirID;
std::string name;
std::string entryID;
int mode;
uint uid;
uint gid;
int64_t filesize;
unsigned numLinks;
UInt16Vector stripeTargets;
// UInt16Vector* origStripeTargets;
try
{
unsigned i = 1;
parentDirID = paramVec.at(i++);
name = paramVec.at(i++);
entryID = paramVec.at(i++);
mode = StringTk::strToInt(paramVec.at(i++));
uid = StringTk::strToUInt(paramVec.at(i++));
gid = StringTk::strToUInt(paramVec.at(i++));
filesize = StringTk::strToInt64(paramVec.at(i++));
numLinks = StringTk::strToUInt(paramVec.at(i++));
StringTk::strToUint16Vec(paramVec.at(i++), &stripeTargets);
}
catch (std::out_of_range& e)
{
std::string paramFormatStr =
"parentDirID entryName entryID mode uid gid filesize numLinks stripeTargets";
return "Invalid or missing parameters; Parameter format: " + paramFormatStr;
}
EntryInfo entryInfo(Program::getApp()->getLocalNodeNumID(), parentDirID, entryID, name,
DirEntryType_REGULARFILE, 0);
auto [fileInode, referenceRes] = metaStore->referenceFile(&entryInfo);
if (!fileInode)
return "Could not reference inode";
StatData statData;
fileInode->getStatData(statData);
statData.setMode(mode);
statData.setUserID(uid);
statData.setGroupID(gid);
statData.setFileSize(filesize);
statData.setNumHardLinks(numLinks);
fileInode->setStatData(statData);
StripePattern* pattern = fileInode->getStripePattern();
UInt16Vector* origTargets = pattern->getStripeTargetIDsModifyable();
*origTargets = stripeTargets;
fileInode->updateInodeOnDisk(&entryInfo, pattern);
metaStore->releaseFile(parentDirID, fileInode);
return retStr;
}
#endif // BEEGFS_DEBUG

View File

@@ -0,0 +1,36 @@
#pragma once
#include <common/net/message/nodes/GenericDebugMsg.h>
#include <common/Common.h>
class GenericDebugMsgEx : public GenericDebugMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
std::string processCommand();
std::string processOpListFileAppendLocks(std::istringstream& commandStream);
std::string processOpListFileEntryLocks(std::istringstream& commandStream);
std::string processOpListFileRangeLocks(std::istringstream& commandStream);
std::string processOpListOpenFiles(std::istringstream& commandStream);
std::string processOpReferenceStatistics(std::istringstream& commandStream);
std::string processOpCacheStatistics(std::istringstream& commandStream);
std::string processOpVersion(std::istringstream& commandStream);
std::string processOpMsgQueueStats(std::istringstream& commandStream);
std::string processOpListPools(std::istringstream& commandStream);
std::string processOpDumpDentry(std::istringstream& commandStream);
std::string processOpDumpInode(std::istringstream& commandStream);
std::string processOpDumpInlinedInode(std::istringstream& commandStream);
std::string processOpQuotaExceeded(std::istringstream& commandStream);
#ifdef BEEGFS_DEBUG
std::string processOpWriteDirDentry(std::istringstream& commandStream);
std::string processOpWriteDirInode(std::istringstream& commandStream);
std::string processOpWriteInlinedFileInode(std::istringstream& commandStream);
#endif // BEEGFS_DEBUG
};

View File

@@ -0,0 +1,32 @@
#include <program/Program.h>
#include <common/net/message/storage/GetHighResStatsRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <common/nodes/OpCounter.h>
#include "GetClientStatsMsgEx.h"
#include <nodes/MetaNodeOpStats.h>
#include <common/net/message/nodes/GetClientStatsRespMsg.h>
/**
* Server side gets a GetClientStatsMsgEx request
*/
bool GetClientStatsMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("GetClientStatsMsgEx incoming");
uint64_t cookieIP = getCookieIP(); // requested is cookie+1
// get stats
MetaNodeOpStats* opStats = Program::getApp()->getNodeOpStats();
bool wantPerUserStats = isMsgHeaderFeatureFlagSet(GETCLIENTSTATSMSG_FLAG_PERUSERSTATS);
UInt64Vector opStatsVec;
opStats->mapToUInt64Vec(
cookieIP, GETCLIENTSTATSRESP_MAX_PAYLOAD_LEN, wantPerUserStats, &opStatsVec);
ctx.sendResponse(GetClientStatsRespMsg(&opStatsVec) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/nodes/GetClientStatsMsg.h>
class GetClientStatsMsgEx : public GetClientStatsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,65 @@
#include <common/net/message/nodes/GetNodeCapacityPoolsRespMsg.h>
#include <common/storage/StoragePool.h>
#include <program/Program.h>
#include "GetNodeCapacityPoolsMsgEx.h"
bool GetNodeCapacityPoolsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "GetNodeCapacityPools incoming";
CapacityPoolQueryType poolType = getCapacityPoolQueryType();
LOG_DEBUG(logContext, Log_SPAM, "PoolType: " + StringTk::intToStr(poolType) );
const App* app = Program::getApp();
GetNodeCapacityPoolsRespMsg::PoolsMap capacityPoolsMap;
switch(poolType)
{
case CapacityPoolQuery_META:
{
const NodeCapacityPools* capPools = app->getMetaCapacityPools();
capacityPoolsMap[StoragePoolId(StoragePoolStore::INVALID_POOL_ID)] =
capPools->getPoolsAsLists();
} break;
case CapacityPoolQuery_STORAGE:
{
const StoragePoolPtrVec storagePools = app->getStoragePoolStore()->getPoolsAsVec();
for (auto iter = storagePools.begin(); iter != storagePools.end(); iter++)
{
const TargetCapacityPools* capPools = (*iter)->getTargetCapacityPools();
capacityPoolsMap[(*iter)->getId()] = capPools->getPoolsAsLists();
}
} break;
case CapacityPoolQuery_STORAGEBUDDIES:
{
const StoragePoolPtrVec storagePools = app->getStoragePoolStore()->getPoolsAsVec();
for (auto iter = storagePools.begin(); iter != storagePools.end(); iter++)
{
const NodeCapacityPools* capPools = (*iter)->getBuddyCapacityPools();
capacityPoolsMap[(*iter)->getId()] = capPools->getPoolsAsLists();
}
} break;
default:
{
LogContext(logContext).logErr("Invalid pool type: " + StringTk::intToStr(poolType) );
return false;
} break;
}
ctx.sendResponse(GetNodeCapacityPoolsRespMsg(&capacityPoolsMap));
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/GetNodeCapacityPoolsMsg.h>
class GetNodeCapacityPoolsMsgEx : public GetNodeCapacityPoolsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

Some files were not shown because too many files have changed in this diff Show More