301 lines
11 KiB
C
301 lines
11 KiB
C
|
/*
|
||
|
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
|
||
|
*
|
||
|
* Licensed under the BSD License, see LICENSE file for details.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#ifndef CPP_INCLUDE_LIBXTREEMFS_ASYNC_WRITE_HANDLER_H_
|
||
|
#define CPP_INCLUDE_LIBXTREEMFS_ASYNC_WRITE_HANDLER_H_
|
||
|
|
||
|
#include <boost/thread/condition.hpp>
|
||
|
#include <boost/thread/mutex.hpp>
|
||
|
#include <list>
|
||
|
|
||
|
#include "libxtreemfs/execute_sync_request.h"
|
||
|
#include "libxtreemfs/options.h"
|
||
|
#include "rpc/callback_interface.h"
|
||
|
#include "util/synchronized_queue.h"
|
||
|
|
||
|
namespace xtreemfs {
|
||
|
|
||
|
struct AsyncWriteBuffer;
|
||
|
class FileInfo;
|
||
|
class UUIDResolver;
|
||
|
class UUIDIterator;
|
||
|
|
||
|
namespace pbrpc {
|
||
|
class OSDServiceClient;
|
||
|
class OSDWriteResponse;
|
||
|
} // namespace pbrpc
|
||
|
|
||
|
class AsyncWriteHandler
|
||
|
: public xtreemfs::rpc::CallbackInterface<
|
||
|
xtreemfs::pbrpc::OSDWriteResponse> {
|
||
|
public:
|
||
|
struct CallbackEntry {
|
||
|
/**
|
||
|
* @remark Ownerships of response_message, data and error are transferred.
|
||
|
*/
|
||
|
CallbackEntry(AsyncWriteHandler* handler,
|
||
|
xtreemfs::pbrpc::OSDWriteResponse* response_message,
|
||
|
char* data,
|
||
|
uint32_t data_length,
|
||
|
xtreemfs::pbrpc::RPCHeader::ErrorResponse* error,
|
||
|
void* context)
|
||
|
: handler_(handler),
|
||
|
response_message_(response_message),
|
||
|
data_(data),
|
||
|
data_length_(data_length),
|
||
|
error_(error),
|
||
|
context_(context) {}
|
||
|
|
||
|
AsyncWriteHandler* handler_;
|
||
|
xtreemfs::pbrpc::OSDWriteResponse* response_message_;
|
||
|
char* data_;
|
||
|
uint32_t data_length_;
|
||
|
xtreemfs::pbrpc::RPCHeader::ErrorResponse* error_;
|
||
|
void* context_;
|
||
|
};
|
||
|
|
||
|
AsyncWriteHandler(
|
||
|
FileInfo* file_info,
|
||
|
UUIDIterator* uuid_iterator,
|
||
|
UUIDResolver* uuid_resolver,
|
||
|
xtreemfs::pbrpc::OSDServiceClient* osd_service_client,
|
||
|
const xtreemfs::pbrpc::Auth& auth_bogus,
|
||
|
const xtreemfs::pbrpc::UserCredentials& user_credentials_bogus,
|
||
|
const Options& volume_options,
|
||
|
util::SynchronizedQueue<CallbackEntry>& callback_queue_);
|
||
|
|
||
|
~AsyncWriteHandler();
|
||
|
|
||
|
/** Adds write_buffer to the list of pending writes and sends it to the OSD
|
||
|
* specified by write_buffer->uuid_iterator (or write_buffer->osd_uuid if
|
||
|
* write_buffer->use_uuid_iterator is false).
|
||
|
*
|
||
|
* Blocks if the number of pending bytes exceeds the maximum write-ahead
|
||
|
* or WaitForPendingWrites{NonBlocking}() was called beforehand.
|
||
|
*/
|
||
|
void Write(AsyncWriteBuffer* write_buffer);
|
||
|
|
||
|
/** Blocks until state changes back to IDLE and prevents allowing new writes.
|
||
|
* by blocking further Write() calls. */
|
||
|
void WaitForPendingWrites();
|
||
|
|
||
|
/** If waiting for pending writes would block, it returns true and adds
|
||
|
* the parameters to the list waiting_observers_ and calls notify_one()
|
||
|
* on condition_variable once state_ changed back to IDLE. */
|
||
|
bool WaitForPendingWritesNonBlocking(boost::condition* condition_variable,
|
||
|
bool* wait_completed,
|
||
|
boost::mutex* wait_completed_mutex);
|
||
|
|
||
|
/** This static method runs in its own thread and does the real callback
|
||
|
* handling to avoid load and blocking on the RPC thread. */
|
||
|
static void ProcessCallbacks(util::SynchronizedQueue<CallbackEntry>& callback_queue);
|
||
|
|
||
|
private:
|
||
|
/** Possible states of this object. */
|
||
|
enum State {
|
||
|
IDLE,
|
||
|
WRITES_PENDING,
|
||
|
HAS_FAILED_WRITES,
|
||
|
FINALLY_FAILED
|
||
|
};
|
||
|
|
||
|
/** Contains information about observer who has to be notified once all
|
||
|
* currently pending writes have finished. */
|
||
|
struct WaitForCompletionObserver {
|
||
|
WaitForCompletionObserver(boost::condition* condition_variable,
|
||
|
bool* wait_completed,
|
||
|
boost::mutex* wait_completed_mutex)
|
||
|
: condition_variable(condition_variable),
|
||
|
wait_completed(wait_completed),
|
||
|
wait_completed_mutex(wait_completed_mutex) {
|
||
|
assert(condition_variable && wait_completed && wait_completed_mutex);
|
||
|
}
|
||
|
boost::condition* condition_variable;
|
||
|
bool* wait_completed;
|
||
|
boost::mutex* wait_completed_mutex;
|
||
|
};
|
||
|
|
||
|
/** Implements callback for an async write request. This method just enqueues
|
||
|
* data. The actual handling of the callback is done by another thread via
|
||
|
* HandleCallback(). */
|
||
|
virtual void CallFinished(xtreemfs::pbrpc::OSDWriteResponse* response_message,
|
||
|
char* data,
|
||
|
uint32_t data_length,
|
||
|
xtreemfs::pbrpc::RPCHeader::ErrorResponse* error,
|
||
|
void* context);
|
||
|
|
||
|
/** Implements callback handling for an async write request. This method is
|
||
|
* called for all queued callbacks in a separate thread.*/
|
||
|
void HandleCallback(xtreemfs::pbrpc::OSDWriteResponse* response_message,
|
||
|
char* data,
|
||
|
uint32_t data_length,
|
||
|
xtreemfs::pbrpc::RPCHeader::ErrorResponse* error,
|
||
|
void* context);
|
||
|
|
||
|
/** Helper function which adds "write_buffer" to the list writes_in_flight_,
|
||
|
* increases the number of pending bytes and takes care of state changes.
|
||
|
*
|
||
|
* @remark Ownership is not transferred to the caller.
|
||
|
* @remark Requires a lock on mutex_.
|
||
|
*/
|
||
|
void IncreasePendingBytesHelper(AsyncWriteBuffer* write_buffer,
|
||
|
boost::mutex::scoped_lock* lock);
|
||
|
|
||
|
/** Helper function reduces the number of pending bytes and takes care
|
||
|
* of state changes.
|
||
|
* Depending on "delete_buffer" the buffer is deleted or not (which implies
|
||
|
* DeleteBufferHelper must be called later).
|
||
|
*
|
||
|
* @remark Ownership of "write_buffer" is transferred to the caller.
|
||
|
* @remark Requires a lock on mutex_.
|
||
|
*/
|
||
|
void DecreasePendingBytesHelper(AsyncWriteBuffer* write_buffer,
|
||
|
boost::mutex::scoped_lock* lock,
|
||
|
bool delete_buffer);
|
||
|
|
||
|
/** Helper function which removes all leading elements which were flagged
|
||
|
* as successfully sent from writes_in_flight_ and deletes them.
|
||
|
*
|
||
|
* @remark Requires a lock on mutex_.
|
||
|
*/
|
||
|
void DeleteBufferHelper(boost::mutex::scoped_lock* lock);
|
||
|
|
||
|
/** Helper to enter the FINALLY_FAILED state in a thread-safe way. CleanUp
|
||
|
* is done automatically when the last expected Callback arrives.
|
||
|
*/
|
||
|
void FailFinallyHelper();
|
||
|
|
||
|
/** This helper method is used to clean up after the AsyncWriteHandler
|
||
|
* reaches the finally failed state. So all write buffers are deleted,
|
||
|
* and waiting threads are notified.
|
||
|
*/
|
||
|
void CleanUp(boost::mutex::scoped_lock* lock);
|
||
|
|
||
|
/** This method is used to repeat failed writes which already are in the list
|
||
|
* of writes in flight. It bypasses the writeahead limitations.
|
||
|
*/
|
||
|
void ReWrite(AsyncWriteBuffer* write_buffer,
|
||
|
boost::mutex::scoped_lock* lock);
|
||
|
|
||
|
/** Common code, used by Write and ReWrite.
|
||
|
* Pay attention to the locking semantics:
|
||
|
* In case of a write (is_rewrite == false), WriteCommon() expects to be
|
||
|
* called from an unlocked context. In case of a rewrite, the opposite
|
||
|
* applies.
|
||
|
*/
|
||
|
void WriteCommon(AsyncWriteBuffer* write_buffer,
|
||
|
boost::mutex::scoped_lock* lock,
|
||
|
bool is_rewrite);
|
||
|
|
||
|
/** Calls notify_one() on all observers in waiting_observers_, frees each
|
||
|
* element in the list and clears the list afterwards.
|
||
|
*
|
||
|
* @remark Requires a lock on mutex_.
|
||
|
*/
|
||
|
void NotifyWaitingObserversAndClearAll(boost::mutex::scoped_lock* lock);
|
||
|
|
||
|
/** Use this when modifying the object. */
|
||
|
boost::mutex mutex_;
|
||
|
|
||
|
/** State of this object. */
|
||
|
State state_;
|
||
|
|
||
|
/** List of pending writes. */
|
||
|
std::list<AsyncWriteBuffer*> writes_in_flight_;
|
||
|
|
||
|
/** Number of pending bytes. */
|
||
|
int pending_bytes_;
|
||
|
|
||
|
/** Number of pending write requests
|
||
|
* NOTE: this does not equal writes_in_flight_.size(), since it also contains
|
||
|
* successfully sent entries which must be kept for consistent retries in
|
||
|
* case of failure. */
|
||
|
int pending_writes_;
|
||
|
|
||
|
/** Set by WaitForPendingWrites{NonBlocking}() to true if there are
|
||
|
* temporarily no new async writes allowed and will be set to false again
|
||
|
* once the state IDLE is reached. */
|
||
|
bool writing_paused_;
|
||
|
|
||
|
/** Used to notify blocked WaitForPendingWrites() callers for the state change
|
||
|
* back to IDLE. */
|
||
|
boost::condition all_pending_writes_did_complete_;
|
||
|
|
||
|
/** Number of threads blocked by WaitForPendingWrites() waiting on
|
||
|
* all_pending_writes_did_complete_ for a state change back to IDLE.
|
||
|
*
|
||
|
* This does not include the number of waiting threads which did call
|
||
|
* WaitForPendingWritesNonBlocking(). Therefore, see "waiting_observers_".
|
||
|
* The total number of all waiting threads is:
|
||
|
* waiting_blocking_threads_count_ + waiting_observers_.size()
|
||
|
*/
|
||
|
int waiting_blocking_threads_count_;
|
||
|
|
||
|
/** Used to notify blocked Write() callers that the number of pending bytes
|
||
|
* has decreased. */
|
||
|
boost::condition pending_bytes_were_decreased_;
|
||
|
|
||
|
/** List of WaitForPendingWritesNonBlocking() observers (specified by their
|
||
|
* boost::condition variable and their bool value which will be set to true
|
||
|
* if the state changed back to IDLE). */
|
||
|
std::list<WaitForCompletionObserver*> waiting_observers_;
|
||
|
|
||
|
/** FileInfo object to which this AsyncWriteHandler does belong. Accessed for
|
||
|
* file size updates. */
|
||
|
FileInfo* file_info_;
|
||
|
|
||
|
/** Pointer to the UUIDIterator of the FileInfo object. */
|
||
|
UUIDIterator* uuid_iterator_;
|
||
|
|
||
|
/** Required for resolving UUIDs to addresses. */
|
||
|
UUIDResolver* uuid_resolver_;
|
||
|
|
||
|
/** Options (Max retries, ...) used when resolving UUIDs. */
|
||
|
RPCOptions uuid_resolver_options_;
|
||
|
|
||
|
/** Client which is used to send out the writes. */
|
||
|
xtreemfs::pbrpc::OSDServiceClient* osd_service_client_;
|
||
|
|
||
|
/** Auth needed for ServiceClients. Always set to AUTH_NONE by Volume. */
|
||
|
const xtreemfs::pbrpc::Auth& auth_bogus_;
|
||
|
|
||
|
/** For same reason needed as auth_bogus_. Always set to user "xtreemfs". */
|
||
|
const xtreemfs::pbrpc::UserCredentials& user_credentials_bogus_;
|
||
|
|
||
|
const Options& volume_options_;
|
||
|
|
||
|
/** Maximum number in bytes which may be pending. */
|
||
|
const int max_writeahead_;
|
||
|
|
||
|
/** Maximum number of pending write requests. */
|
||
|
const int max_requests_;
|
||
|
|
||
|
/** Maximum number of attempts a write will be tried. */
|
||
|
const int max_write_tries_;
|
||
|
|
||
|
/** True after the first redirct, set back to false on error resolution */
|
||
|
bool redirected_;
|
||
|
|
||
|
/** Set to true in when redirected is set true for the first time. The retries
|
||
|
* wont be delayed if true. */
|
||
|
bool fast_redirect_;
|
||
|
|
||
|
/** A copy of the worst error which was detected. It determines the error
|
||
|
* handling. */
|
||
|
xtreemfs::pbrpc::RPCHeader::ErrorResponse worst_error_;
|
||
|
|
||
|
/** The write buffer to whom the worst_error_ belongs. */
|
||
|
AsyncWriteBuffer* worst_write_buffer_;
|
||
|
|
||
|
/** Used by CallFinished (enqueue) */
|
||
|
util::SynchronizedQueue<CallbackEntry>& callback_queue_;
|
||
|
};
|
||
|
|
||
|
} // namespace xtreemfs
|
||
|
|
||
|
#endif // CPP_INCLUDE_LIBXTREEMFS_ASYNC_WRITE_HANDLER_H_
|