2019-01-07 14:06:15 +01:00
/*
* Copyright ( C ) 2011 Andrea Mazzoleni
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
# include "portable.h"
# include "support.h"
# include "elem.h"
# include "state.h"
# include "parity.h"
# include "handle.h"
# include "io.h"
# include "raid/raid.h"
/****************************************************************************/
/* hash */
static int state_hash_process ( struct snapraid_state * state , block_off_t blockstart , block_off_t blockmax , int * skip_sync )
{
struct snapraid_handle * handle ;
unsigned diskmax ;
block_off_t i ;
unsigned j ;
void * buffer ;
void * buffer_alloc ;
data_off_t countsize ;
block_off_t countpos ;
block_off_t countmax ;
int ret ;
unsigned error ;
unsigned silent_error ;
unsigned io_error ;
char esc_buffer [ ESC_MAX ] ;
/* maps the disks to handles */
handle = handle_mapping ( state , & diskmax ) ;
/* buffer for reading */
buffer = malloc_nofail_direct ( state - > block_size , & buffer_alloc ) ;
if ( ! state - > opt . skip_self )
mtest_vector ( 1 , state - > block_size , & buffer ) ;
error = 0 ;
silent_error = 0 ;
io_error = 0 ;
/* first count the number of blocks to process */
countmax = 0 ;
for ( j = 0 ; j < diskmax ; + + j ) {
struct snapraid_disk * disk = handle [ j ] . disk ;
/* if no disk, nothing to check */
if ( ! disk )
continue ;
for ( i = blockstart ; i < blockmax ; + + i ) {
struct snapraid_block * block ;
unsigned block_state ;
block = fs_par2block_find ( disk , i ) ;
/* get the state of the block */
block_state = block_state_get ( block ) ;
/* process REP and CHG blocks */
if ( block_state ! = BLOCK_STATE_REP & & block_state ! = BLOCK_STATE_CHG )
continue ;
+ + countmax ;
}
}
/* drop until now */
state_usage_waste ( state ) ;
countsize = 0 ;
countpos = 0 ;
if ( ! state_progress_begin ( state , blockstart , blockmax , countmax ) )
goto end ;
for ( j = 0 ; j < diskmax ; + + j ) {
struct snapraid_disk * disk = handle [ j ] . disk ;
/* if no disk, nothing to check */
if ( ! disk )
continue ;
for ( i = blockstart ; i < blockmax ; + + i ) {
snapraid_info info ;
int rehash ;
struct snapraid_block * block ;
int read_size ;
unsigned char hash [ HASH_MAX ] ;
unsigned block_state ;
struct snapraid_file * file ;
block_off_t file_pos ;
block = fs_par2block_find ( disk , i ) ;
/* get the state of the block */
block_state = block_state_get ( block ) ;
/* process REP and CHG blocks */
if ( block_state ! = BLOCK_STATE_REP & & block_state ! = BLOCK_STATE_CHG )
continue ;
/* get the file of this block */
file = fs_par2file_get ( disk , i , & file_pos ) ;
/* get block specific info */
info = info_get ( & state - > infoarr , i ) ;
/* if we have to use the old hash */
rehash = info_get_rehash ( info ) ;
/* until now is misc */
state_usage_misc ( state ) ;
/* if the file is different than the current one, close it */
if ( handle [ j ] . file ! = 0 & & handle [ j ] . file ! = file ) {
/* keep a pointer at the file we are going to close for error reporting */
struct snapraid_file * report = handle [ j ] . file ;
ret = handle_close ( & handle [ j ] ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
/* This one is really an unexpected error, because we are only reading */
/* and closing a descriptor should never fail */
if ( errno = = EIO ) {
log_tag ( " error:%u:%s:%s: Close EIO error. %s \n " , i , disk - > name , esc_tag ( report - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " DANGER! Unexpected input/output close error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that disk '%s' is sane and that file '%s' can be accessed. \n " , disk - > dir , handle [ j ] . path ) ;
log_fatal ( " Stopping at block %u \n " , i ) ;
+ + io_error ;
goto bail ;
}
log_tag ( " error:%u:%s:%s: Close error. %s \n " , i , disk - > name , esc_tag ( report - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected close error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that file '%s' can be accessed. \n " , handle [ j ] . path ) ;
log_fatal ( " Stopping at block %u \n " , i ) ;
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
}
ret = handle_open ( & handle [ j ] , file , state - > file_mode , log_error , 0 ) ;
if ( ret = = - 1 ) {
if ( errno = = EIO ) {
/* LCOV_EXCL_START */
log_tag ( " error:%u:%s:%s: Open EIO error. %s \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " DANGER! Unexpected input/output open error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that disk '%s' is sane and that file '%s' can be accessed. \n " , disk - > dir , handle [ j ] . path ) ;
log_fatal ( " Stopping at block %u \n " , i ) ;
+ + io_error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
if ( errno = = ENOENT ) {
log_tag ( " error:%u:%s:%s: Open ENOENT error. %s \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_error ( " Missing file '%s'. \n " , handle [ j ] . path ) ;
log_error ( " WARNING! You cannot modify data disk during a sync. \n " ) ;
log_error ( " Rerun the sync command when finished. \n " ) ;
+ + error ;
/* if the file is missing, it means that it was removed during sync */
/* this isn't a serious error, so we skip this block, and continue with others */
continue ;
}
if ( errno = = EACCES ) {
log_tag ( " error:%u:%s:%s: Open EACCES error. %s \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_error ( " No access at file '%s'. \n " , handle [ j ] . path ) ;
log_error ( " WARNING! Please fix the access permission in the data disk. \n " ) ;
log_error ( " Rerun the sync command when finished. \n " ) ;
+ + error ;
/* this isn't a serious error, so we skip this block, and continue with others */
continue ;
}
/* LCOV_EXCL_START */
log_tag ( " error:%u:%s:%s: Open error. %s \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected open error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that file '%s' can be accessed. \n " , handle [ j ] . path ) ;
log_fatal ( " Stopping to allow recovery. Try with 'snapraid check -f /%s' \n " , fmt_poll ( disk , file - > sub , esc_buffer ) ) ;
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
/* check if the file is changed */
if ( handle [ j ] . st . st_size ! = file - > size
| | handle [ j ] . st . st_mtime ! = file - > mtime_sec
| | STAT_NSEC ( & handle [ j ] . st ) ! = file - > mtime_nsec
| | handle [ j ] . st . st_ino ! = file - > inode
) {
log_tag ( " error:%u:%s:%s: Unexpected attribute change \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) ) ;
if ( handle [ j ] . st . st_size ! = file - > size ) {
log_error ( " Unexpected size change at file '%s' from % " PRIu64 " to % " PRIu64 " . \n " , handle [ j ] . path , file - > size , ( uint64_t ) handle [ j ] . st . st_size ) ;
} else if ( handle [ j ] . st . st_mtime ! = file - > mtime_sec
| | STAT_NSEC ( & handle [ j ] . st ) ! = file - > mtime_nsec ) {
log_error ( " Unexpected time change at file '%s' from % " PRIu64 " .%d to % " PRIu64 " .%d. \n " , handle [ j ] . path , file - > mtime_sec , file - > mtime_nsec , ( uint64_t ) handle [ j ] . st . st_mtime , STAT_NSEC ( & handle [ j ] . st ) ) ;
} else {
log_error ( " Unexpected inode change from % " PRIu64 " to % " PRIu64 " at file '%s'. \n " , file - > inode , ( uint64_t ) handle [ j ] . st . st_ino , handle [ j ] . path ) ;
}
log_error ( " WARNING! You cannot modify files during a sync. \n " ) ;
log_error ( " Rerun the sync command when finished. \n " ) ;
+ + error ;
/* if the file is changed, it means that it was modified during sync */
/* this isn't a serious error, so we skip this block, and continue with others */
continue ;
}
read_size = handle_read ( & handle [ j ] , file_pos , buffer , state - > block_size , log_fatal , 0 ) ;
if ( read_size = = - 1 ) {
/* LCOV_EXCL_START */
if ( errno = = EIO ) {
log_tag ( " error:%u:%s:%s: Read EIO error at position %u. %s \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) , file_pos , strerror ( errno ) ) ;
log_fatal ( " DANGER! Unexpected input/output read error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that disk '%s' is sane and that file '%s' can be read. \n " , disk - > dir , handle [ j ] . path ) ;
log_fatal ( " Stopping at block %u \n " , i ) ;
+ + io_error ;
goto bail ;
}
log_tag ( " error:%u:%s:%s: Read error at position %u. %s \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) , file_pos , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected read error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that file '%s' can be read. \n " , handle [ j ] . path ) ;
log_fatal ( " Stopping to allow recovery. Try with 'snapraid check -f /%s' \n " , fmt_poll ( disk , file - > sub , esc_buffer ) ) ;
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
/* until now is disk */
state_usage_disk ( state , handle , & j , 1 ) ;
state_usage_file ( state , disk , file ) ;
countsize + = read_size ;
/* now compute the hash */
if ( rehash ) {
memhash ( state - > prevhash , state - > prevhashseed , hash , buffer , read_size ) ;
} else {
memhash ( state - > hash , state - > hashseed , hash , buffer , read_size ) ;
}
/* until now is hash */
state_usage_hash ( state ) ;
if ( block_state = = BLOCK_STATE_REP ) {
/* compare the hash */
if ( memcmp ( hash , block - > hash , BLOCK_HASH_SIZE ) ! = 0 ) {
log_tag ( " error:%u:%s:%s: Unexpected data change \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) ) ;
log_error ( " Data change at file '%s' at position '%u' \n " , handle [ j ] . path , file_pos ) ;
log_error ( " WARNING! Unexpected data modification of a file without parity! \n " ) ;
if ( file_flag_has ( file , FILE_IS_COPY ) ) {
log_error ( " This file was detected as a copy of another file with the same name, size, \n " ) ;
log_error ( " and timestamp, but the file data isn't matching the assumed copy. \n " ) ;
log_error ( " If this is a false positive, and the files are expected to be different, \n " ) ;
log_error ( " you can 'sync' anyway using 'snapraid --force-nocopy sync' \n " ) ;
} else {
log_error ( " Try removing the file from the array and rerun the 'sync' command! \n " ) ;
}
/* block sync to allow a recovery before overwriting */
/* the parity needed to make such recovery */
* skip_sync = 1 ; /* avoid to run the next sync */
+ + silent_error ;
continue ;
}
} else {
/* the only other case is BLOCK_STATE_CHG */
assert ( block_state = = BLOCK_STATE_CHG ) ;
/* copy the hash in the block */
memcpy ( block - > hash , hash , BLOCK_HASH_SIZE ) ;
/* and mark the block as hashed */
block_state_set ( block , BLOCK_STATE_REP ) ;
/* mark the state as needing write */
state - > need_write = 1 ;
}
/* count the number of processed block */
+ + countpos ;
/* progress */
if ( state_progress ( state , 0 , i , countpos , countmax , countsize ) ) {
/* LCOV_EXCL_START */
* skip_sync = 1 ; /* avoid to run the next sync */
break ;
/* LCOV_EXCL_STOP */
}
}
/* close the last file in the disk */
if ( handle [ j ] . file ! = 0 ) {
/* keep a pointer at the file we are going to close for error reporting */
struct snapraid_file * report = handle [ j ] . file ;
ret = handle_close ( & handle [ j ] ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
/* This one is really an unexpected error, because we are only reading */
/* and closing a descriptor should never fail */
if ( errno = = EIO ) {
log_tag ( " error:%u:%s:%s: Close EIO error. %s \n " , blockmax , disk - > name , esc_tag ( report - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " DANGER! Unexpected input/output close error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that disk '%s' is sane and that file '%s' can be accessed. \n " , disk - > dir , handle [ j ] . path ) ;
log_fatal ( " Stopping at block %u \n " , blockmax ) ;
+ + io_error ;
goto bail ;
}
log_tag ( " error:%u:%s:%s: Close error. %s \n " , blockmax , disk - > name , esc_tag ( report - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected close error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that file '%s' can be accessed. \n " , handle [ j ] . path ) ;
log_fatal ( " Stopping at block %u \n " , blockmax ) ;
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
}
}
end :
state_progress_end ( state , countpos , countmax , countsize ) ;
/* note that at this point no io_error is possible */
/* because at the first one we bail out */
assert ( io_error = = 0 ) ;
if ( error | | io_error | | silent_error ) {
msg_status ( " \n " ) ;
msg_status ( " %8u file errors \n " , error ) ;
msg_status ( " %8u io errors \n " , io_error ) ;
msg_status ( " %8u data errors \n " , silent_error ) ;
} else {
/* print the result only if processed something */
if ( countpos ! = 0 )
msg_status ( " Everything OK \n " ) ;
}
if ( error )
log_fatal ( " WARNING! Unexpected file errors! \n " ) ;
log_tag ( " hash_summary:error_file:%u \n " , error ) ;
/* proceed without bailing out */
goto finish ;
bail :
/* on bail, don't run the next sync */
* skip_sync = 1 ;
/* close files left open */
for ( j = 0 ; j < diskmax ; + + j ) {
struct snapraid_file * file = handle [ j ] . file ;
struct snapraid_disk * disk = handle [ j ] . disk ;
ret = handle_close ( & handle [ j ] ) ;
if ( ret = = - 1 ) {
log_tag ( " error:%u:%s:%s: Close error. %s \n " , i , disk - > name , esc_tag ( file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " DANGER! Unexpected close error in a data disk. \n " ) ;
+ + error ;
/* continue, as we are already exiting */
}
}
finish :
free ( handle ) ;
free ( buffer_alloc ) ;
if ( error + io_error + silent_error ! = 0 )
return - 1 ;
return 0 ;
}
/****************************************************************************/
/* sync */
/**
* Sync plan to use .
*/
struct snapraid_plan {
unsigned handle_max ;
struct snapraid_handle * handle_map ;
int force_full ;
} ;
/**
* A block that failed the hash check , or that was deleted .
*/
struct failed_struct {
unsigned index ; /**< Index of the failed block. */
unsigned size ; /**< Size of the block. */
struct snapraid_block * block ; /**< The failed block, or BLOCK_DELETED for a deleted block */
} ;
/**
* Comparison function for sorting by index .
*/
int failed_compare_by_index ( const void * void_a , const void * void_b )
{
const struct failed_struct * a = void_a ;
const struct failed_struct * b = void_b ;
if ( a - > index < b - > index )
return - 1 ;
if ( a - > index > b - > index )
return 1 ;
return 0 ;
}
/**
* Buffer for storing the new hashes .
*/
struct snapraid_rehash {
unsigned char hash [ HASH_MAX ] ;
struct snapraid_block * block ;
} ;
/**
* Check if we have to process the specified block index : : i .
*/
static int block_is_enabled ( void * void_plan , block_off_t i )
{
struct snapraid_plan * plan = void_plan ;
unsigned j ;
int one_invalid ;
int one_valid ;
/* for each disk */
one_invalid = 0 ;
one_valid = 0 ;
for ( j = 0 ; j < plan - > handle_max ; + + j ) {
struct snapraid_block * block ;
struct snapraid_disk * disk = plan - > handle_map [ j ] . disk ;
/* if no disk, nothing to check */
if ( ! disk )
continue ;
block = fs_par2block_find ( disk , i ) ;
if ( block_has_file ( block ) )
one_valid = 1 ;
if ( block_has_invalid_parity ( block ) | | plan - > force_full )
one_invalid = 1 ;
}
/* if none valid or none invalid, we don't need to update */
if ( ! one_invalid | | ! one_valid )
return 0 ;
return 1 ;
}
static void sync_data_reader ( struct snapraid_worker * worker , struct snapraid_task * task )
{
struct snapraid_io * io = worker - > io ;
struct snapraid_state * state = io - > state ;
struct snapraid_handle * handle = worker - > handle ;
struct snapraid_disk * disk = handle - > disk ;
block_off_t blockcur = task - > position ;
unsigned char * buffer = task - > buffer ;
int ret ;
char esc_buffer [ ESC_MAX ] ;
/* if the disk position is not used */
if ( ! disk ) {
/* use an empty block */
memset ( buffer , 0 , state - > block_size ) ;
task - > state = TASK_STATE_DONE ;
return ;
}
/* get the block */
task - > block = fs_par2block_find ( disk , blockcur ) ;
/* if the block has no file, meaning that it's EMPTY or DELETED, */
/* it doesn't participate in the new parity computation */
if ( ! block_has_file ( task - > block ) ) {
/* use an empty block */
memset ( buffer , 0 , state - > block_size ) ;
task - > state = TASK_STATE_DONE ;
return ;
}
/* get the file of this block */
task - > file = fs_par2file_get ( disk , blockcur , & task - > file_pos ) ;
/* if the file is different than the current one, close it */
if ( handle - > file ! = 0 & & handle - > file ! = task - > file ) {
/* keep a pointer at the file we are going to close for error reporting */
struct snapraid_file * report = handle - > file ;
ret = handle_close ( handle ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
/* This one is really an unexpected error, because we are only reading */
/* and closing a descriptor should never fail */
if ( errno = = EIO ) {
log_tag ( " error:%u:%s:%s: Close EIO error. %s \n " , blockcur , disk - > name , esc_tag ( report - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " DANGER! Unexpected input/output close error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that disk '%s' is sane and that file '%s' can be accessed. \n " , disk - > dir , handle - > path ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
task - > state = TASK_STATE_IOERROR ;
return ;
}
log_tag ( " error:%u:%s:%s: Close error. %s \n " , blockcur , disk - > name , esc_tag ( report - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected close error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that file '%s' can be accessed. \n " , handle - > path ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
task - > state = TASK_STATE_ERROR ;
return ;
/* LCOV_EXCL_STOP */
}
}
ret = handle_open ( handle , task - > file , state - > file_mode , log_error , 0 ) ;
if ( ret = = - 1 ) {
if ( errno = = EIO ) {
/* LCOV_EXCL_START */
log_tag ( " error:%u:%s:%s: Open EIO error. %s \n " , blockcur , disk - > name , esc_tag ( task - > file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " DANGER! Unexpected input/output open error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that disk '%s' is sane and that file '%s' can be accessed. \n " , disk - > dir , handle - > path ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
task - > state = TASK_STATE_IOERROR ;
return ;
/* LCOV_EXCL_STOP */
}
if ( errno = = ENOENT ) {
log_tag ( " error:%u:%s:%s: Open ENOENT error. %s \n " , blockcur , disk - > name , esc_tag ( task - > file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_error ( " Missing file '%s'. \n " , handle - > path ) ;
log_error ( " WARNING! You cannot modify data disk during a sync. \n " ) ;
log_error ( " Rerun the sync command when finished. \n " ) ;
/* if the file is missing, it means that it was removed during sync */
/* this isn't a serious error, so we skip this block, and continue with others */
task - > state = TASK_STATE_ERROR_CONTINUE ;
return ;
}
if ( errno = = EACCES ) {
log_tag ( " error:%u:%s:%s: Open EACCES error. %s \n " , blockcur , disk - > name , esc_tag ( task - > file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_error ( " No access at file '%s'. \n " , handle - > path ) ;
log_error ( " WARNING! Please fix the access permission in the data disk. \n " ) ;
log_error ( " Rerun the sync command when finished. \n " ) ;
/* this isn't a serious error, so we skip this block, and continue with others */
task - > state = TASK_STATE_ERROR_CONTINUE ;
return ;
}
/* LCOV_EXCL_START */
log_tag ( " error:%u:%s:%s: Open error. %s \n " , blockcur , disk - > name , esc_tag ( task - > file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected open error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that file '%s' can be accessed. \n " , handle - > path ) ;
log_fatal ( " Stopping to allow recovery. Try with 'snapraid check -f /%s' \n " , fmt_poll ( disk , task - > file - > sub , esc_buffer ) ) ;
task - > state = TASK_STATE_ERROR ;
return ;
/* LCOV_EXCL_STOP */
}
/* check if the file is changed */
if ( handle - > st . st_size ! = task - > file - > size
| | handle - > st . st_mtime ! = task - > file - > mtime_sec
| | STAT_NSEC ( & handle - > st ) ! = task - > file - > mtime_nsec
| | handle - > st . st_ino ! = task - > file - > inode
) {
log_tag ( " error:%u:%s:%s: Unexpected attribute change \n " , blockcur , disk - > name , esc_tag ( task - > file - > sub , esc_buffer ) ) ;
if ( handle - > st . st_size ! = task - > file - > size ) {
log_error ( " Unexpected size change at file '%s' from % " PRIu64 " to % " PRIu64 " . \n " , handle - > path , task - > file - > size , ( uint64_t ) handle - > st . st_size ) ;
} else if ( handle - > st . st_mtime ! = task - > file - > mtime_sec
| | STAT_NSEC ( & handle - > st ) ! = task - > file - > mtime_nsec ) {
log_error ( " Unexpected time change at file '%s' from % " PRIu64 " .%d to % " PRIu64 " .%d. \n " , handle - > path , task - > file - > mtime_sec , task - > file - > mtime_nsec , ( uint64_t ) handle - > st . st_mtime , STAT_NSEC ( & handle - > st ) ) ;
} else {
log_error ( " Unexpected inode change from % " PRIu64 " to % " PRIu64 " at file '%s'. \n " , task - > file - > inode , ( uint64_t ) handle - > st . st_ino , handle - > path ) ;
}
log_error ( " WARNING! You cannot modify files during a sync. \n " ) ;
log_error ( " Rerun the sync command when finished. \n " ) ;
/* if the file is changed, it means that it was modified during sync */
/* this isn't a serious error, so we skip this block, and continue with others */
task - > state = TASK_STATE_ERROR_CONTINUE ;
return ;
}
task - > read_size = handle_read ( handle , task - > file_pos , buffer , state - > block_size , log_error , 0 ) ;
if ( task - > read_size = = - 1 ) {
/* LCOV_EXCL_START */
if ( errno = = EIO ) {
log_tag ( " error:%u:%s:%s: Read EIO error at position %u. %s \n " , blockcur , disk - > name , esc_tag ( task - > file - > sub , esc_buffer ) , task - > file_pos , strerror ( errno ) ) ;
log_error ( " Input/Output error in file '%s' at position '%u' \n " , handle - > path , task - > file_pos ) ;
task - > state = TASK_STATE_IOERROR_CONTINUE ;
return ;
}
log_tag ( " error:%u:%s:%s: Read error at position %u. %s \n " , blockcur , disk - > name , esc_tag ( task - > file - > sub , esc_buffer ) , task - > file_pos , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected read error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that file '%s' can be read. \n " , handle - > path ) ;
log_fatal ( " Stopping to allow recovery. Try with 'snapraid check -f /%s' \n " , fmt_poll ( disk , task - > file - > sub , esc_buffer ) ) ;
task - > state = TASK_STATE_ERROR ;
return ;
/* LCOV_EXCL_STOP */
}
/* store the path of the opened file */
pathcpy ( task - > path , sizeof ( task - > path ) , handle - > path ) ;
task - > state = TASK_STATE_DONE ;
}
static void sync_parity_writer ( struct snapraid_worker * worker , struct snapraid_task * task )
{
struct snapraid_io * io = worker - > io ;
struct snapraid_state * state = io - > state ;
struct snapraid_parity_handle * parity_handle = worker - > parity_handle ;
unsigned level = parity_handle - > level ;
block_off_t blockcur = task - > position ;
unsigned char * buffer = task - > buffer ;
int ret ;
/* write parity */
ret = parity_write ( parity_handle , blockcur , buffer , state - > block_size ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
if ( errno = = EIO ) {
log_tag ( " parity_error:%u:%s: Write EIO error. %s \n " , blockcur , lev_config_name ( level ) , strerror ( errno ) ) ;
log_error ( " Input/Output error in parity '%s' at position '%u' \n " , lev_config_name ( level ) , blockcur ) ;
task - > state = TASK_STATE_IOERROR_CONTINUE ;
return ;
}
log_tag ( " parity_error:%u:%s: Write error. %s \n " , blockcur , lev_config_name ( level ) , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected write error in the %s disk, it isn't possible to sync. \n " , lev_name ( level ) ) ;
log_fatal ( " Ensure that disk '%s' has some free space available. \n " , lev_config_name ( level ) ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
task - > state = TASK_STATE_ERROR ;
return ;
/* LCOV_EXCL_STOP */
}
task - > state = TASK_STATE_DONE ;
}
static int state_sync_process ( struct snapraid_state * state , struct snapraid_parity_handle * parity_handle , block_off_t blockstart , block_off_t blockmax )
{
struct snapraid_io io ;
struct snapraid_plan plan ;
struct snapraid_handle * handle ;
void * rehandle_alloc ;
struct snapraid_rehash * rehandle ;
unsigned diskmax ;
block_off_t blockcur ;
unsigned j ;
void * zero_alloc ;
void * * zero ;
void * copy_alloc ;
void * * copy ;
unsigned buffermax ;
data_off_t countsize ;
block_off_t countpos ;
block_off_t countmax ;
block_off_t autosavedone ;
block_off_t autosavelimit ;
block_off_t autosavemissing ;
int ret ;
unsigned error ;
unsigned silent_error ;
unsigned io_error ;
time_t now ;
struct failed_struct * failed ;
int * failed_map ;
unsigned l ;
unsigned * waiting_map ;
unsigned waiting_mac ;
char esc_buffer [ ESC_MAX ] ;
/* the sync process assumes that all the hashes are correct */
/* including the ones from CHG and DELETED blocks */
assert ( state - > clear_past_hash ! = 0 ) ;
/* get the present time */
now = time ( 0 ) ;
/* maps the disks to handles */
handle = handle_mapping ( state , & diskmax ) ;
/* rehash buffers */
rehandle = malloc_nofail_align ( diskmax * sizeof ( struct snapraid_rehash ) , & rehandle_alloc ) ;
/* we need 1 * data + 1 * parity */
buffermax = diskmax + state - > level ;
/* initialize the io threads */
io_init ( & io , state , state - > opt . io_cache , buffermax , sync_data_reader , handle , diskmax , 0 , sync_parity_writer , parity_handle , state - > level ) ;
/* allocate the copy buffer */
copy = malloc_nofail_vector_align ( diskmax , diskmax , state - > block_size , & copy_alloc ) ;
/* allocate and fill the zero buffer */
zero = malloc_nofail_align ( state - > block_size , & zero_alloc ) ;
memset ( zero , 0 , state - > block_size ) ;
raid_zero ( zero ) ;
failed = malloc_nofail ( diskmax * sizeof ( struct failed_struct ) ) ;
failed_map = malloc_nofail ( diskmax * sizeof ( unsigned ) ) ;
/* possibly waiting disks */
waiting_mac = diskmax > RAID_PARITY_MAX ? diskmax : RAID_PARITY_MAX ;
waiting_map = malloc_nofail ( waiting_mac * sizeof ( unsigned ) ) ;
error = 0 ;
silent_error = 0 ;
io_error = 0 ;
/* first count the number of blocks to process */
countmax = 0 ;
plan . handle_max = diskmax ;
plan . handle_map = handle ;
plan . force_full = state - > opt . force_full ;
for ( blockcur = blockstart ; blockcur < blockmax ; + + blockcur ) {
if ( ! block_is_enabled ( & plan , blockcur ) )
continue ;
+ + countmax ;
}
/* compute the autosave size for all disk, even if not read */
/* this makes sense because the speed should be almost the same */
/* if the disks are read in parallel */
autosavelimit = state - > autosave / ( diskmax * state - > block_size ) ;
autosavemissing = countmax ; /* blocks to do */
autosavedone = 0 ; /* blocks done */
/* drop until now */
state_usage_waste ( state ) ;
countsize = 0 ;
countpos = 0 ;
/* start all the worker threads */
io_start ( & io , blockstart , blockmax , & block_is_enabled , & plan ) ;
if ( ! state_progress_begin ( state , blockstart , blockmax , countmax ) )
goto end ;
while ( 1 ) {
unsigned failed_count ;
int error_on_this_block ;
int silent_error_on_this_block ;
int io_error_on_this_block ;
int fixed_error_on_this_block ;
int parity_needs_to_be_updated ;
int parity_going_to_be_updated ;
snapraid_info info ;
int rehash ;
void * * buffer ;
int writer_error [ IO_WRITER_ERROR_MAX ] ;
/* go to the next block */
blockcur = io_read_next ( & io , & buffer ) ;
if ( blockcur > = blockmax )
break ;
/* until now is scheduling */
state_usage_sched ( state ) ;
/* one more block processed for autosave */
+ + autosavedone ;
- - autosavemissing ;
/* by default process the block, and skip it if something goes wrong */
error_on_this_block = 0 ;
silent_error_on_this_block = 0 ;
io_error_on_this_block = 0 ;
fixed_error_on_this_block = 0 ;
/* keep track of the number of failed blocks */
failed_count = 0 ;
/* get block specific info */
info = info_get ( & state - > infoarr , blockcur ) ;
/* if we have to use the old hash */
rehash = info_get_rehash ( info ) ;
/* if the parity requires to be updated */
/* It could happens that all the blocks are EMPTY/BLK and CHG but with the hash */
/* still matching because the specific CHG block was not modified. */
/* In such case, we can avoid to update parity, because it would be the same as before */
/* Note that CHG/DELETED blocks already present in the content file loaded */
/* have the hash cleared (::clear_past_hash flag), and then they won't never match the hash. */
/* We are treating only CHG blocks created at runtime. */
parity_needs_to_be_updated = state - > opt . force_full | | state - > opt . force_parity_update ;
/* if the parity is going to be updated */
parity_going_to_be_updated = 0 ;
/* if the block is marked as bad, we force the parity update */
/* because the bad block may be the result of a wrong parity */
if ( info_get_bad ( info ) )
parity_needs_to_be_updated = 1 ;
/* for each disk, process the block */
for ( j = 0 ; j < diskmax ; + + j ) {
struct snapraid_task * task ;
int read_size ;
unsigned char hash [ HASH_MAX ] ;
struct snapraid_block * block ;
unsigned block_state ;
struct snapraid_disk * disk ;
struct snapraid_file * file ;
block_off_t file_pos ;
unsigned diskcur ;
/* until now is misc */
state_usage_misc ( state ) ;
task = io_data_read ( & io , & diskcur , waiting_map , & waiting_mac ) ;
/* until now is disk */
state_usage_disk ( state , handle , waiting_map , waiting_mac ) ;
/* get the results */
disk = task - > disk ;
block = task - > block ;
file = task - > file ;
file_pos = task - > file_pos ;
read_size = task - > read_size ;
/* by default no rehash in case of "continue" */
rehandle [ diskcur ] . block = 0 ;
/* if the disk position is not used */
if ( ! disk )
continue ;
state_usage_file ( state , disk , file ) ;
/* get the state of the block */
block_state = block_state_get ( block ) ;
/* if the block has invalid parity, */
/* we have to take care of it in case of recover */
if ( block_has_invalid_parity ( block ) ) {
/* store it in the failed set, because */
/* the parity may be still computed with the previous content */
failed [ failed_count ] . index = diskcur ;
failed [ failed_count ] . size = state - > block_size ;
failed [ failed_count ] . block = block ;
+ + failed_count ;
/* if the block has invalid parity, we have to update the parity */
/* to include this block change */
/* This also apply to CHG blocks, but we are going to handle */
/* later this case to do the updates only if really needed */
if ( block_state ! = BLOCK_STATE_CHG )
parity_needs_to_be_updated = 1 ;
/* note that DELETE blocks are skipped in the next check */
/* and we have to store them in the failed blocks */
/* before skipping */
/* follow */
}
/* if the block is not used */
if ( ! block_has_file ( block ) )
continue ;
/* handle error conditions */
if ( task - > state = = TASK_STATE_IOERROR ) {
/* LCOV_EXCL_START */
+ + io_error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
if ( task - > state = = TASK_STATE_ERROR ) {
/* LCOV_EXCL_START */
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
if ( task - > state = = TASK_STATE_ERROR_CONTINUE ) {
+ + error ;
error_on_this_block = 1 ;
continue ;
}
if ( task - > state = = TASK_STATE_IOERROR_CONTINUE ) {
+ + io_error ;
if ( io_error > = state - > opt . io_error_limit ) {
/* LCOV_EXCL_START */
log_fatal ( " DANGER! Unexpected input/output read error in a data disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Ensure that disk '%s' is sane and that file '%s' can be read. \n " , disk - > dir , task - > path ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
goto bail ;
/* LCOV_EXCL_STOP */
}
/* otherwise continue */
io_error_on_this_block = 1 ;
continue ;
}
if ( task - > state ! = TASK_STATE_DONE ) {
/* LCOV_EXCL_START */
log_fatal ( " Internal inconsistency in task state \n " ) ;
os_abort ( ) ;
/* LCOV_EXCL_STOP */
}
countsize + = read_size ;
/* now compute the hash */
if ( rehash ) {
memhash ( state - > prevhash , state - > prevhashseed , hash , buffer [ diskcur ] , read_size ) ;
/* compute the new hash, and store it */
rehandle [ diskcur ] . block = block ;
memhash ( state - > hash , state - > hashseed , rehandle [ diskcur ] . hash , buffer [ diskcur ] , read_size ) ;
} else {
memhash ( state - > hash , state - > hashseed , hash , buffer [ diskcur ] , read_size ) ;
}
/* until now is hash */
state_usage_hash ( state ) ;
if ( block_has_updated_hash ( block ) ) {
/* compare the hash */
if ( memcmp ( hash , block - > hash , BLOCK_HASH_SIZE ) ! = 0 ) {
/* if the file has invalid parity, it's a REP changed during the sync */
if ( block_has_invalid_parity ( block ) ) {
log_tag ( " error:%u:%s:%s: Unexpected data change \n " , blockcur , disk - > name , esc_tag ( file - > sub , esc_buffer ) ) ;
log_error ( " Data change at file '%s' at position '%u' \n " , task - > path , file_pos ) ;
log_error ( " WARNING! Unexpected data modification of a file without parity! \n " ) ;
if ( file_flag_has ( file , FILE_IS_COPY ) ) {
log_error ( " This file was detected as a copy of another file with the same name, size, \n " ) ;
log_error ( " and timestamp, but the file data isn't matching the assumed copy. \n " ) ;
log_error ( " If this is a false positive, and the files are expected to be different, \n " ) ;
log_error ( " you can 'sync' anyway using 'snapraid --force-nocopy sync' \n " ) ;
} else {
log_error ( " Try removing the file from the array and rerun the 'sync' command! \n " ) ;
}
+ + error ;
/* if the file is changed, it means that it was modified during sync */
/* this isn't a serious error, so we skip this block, and continue with others */
error_on_this_block = 1 ;
continue ;
} else { /* otherwise it's a BLK with silent error */
unsigned diff = memdiff ( hash , block - > hash , BLOCK_HASH_SIZE ) ;
log_tag ( " error:%u:%s:%s: Data error at position %u, diff bits %u/%u \n " , blockcur , disk - > name , esc_tag ( file - > sub , esc_buffer ) , file_pos , diff , BLOCK_HASH_SIZE * 8 ) ;
log_error ( " Data error in file '%s' at position '%u', diff bits %u/%u \n " , task - > path , file_pos , diff , BLOCK_HASH_SIZE * 8 ) ;
/* save the failed block for the fix */
failed [ failed_count ] . index = diskcur ;
failed [ failed_count ] . size = read_size ;
failed [ failed_count ] . block = block ;
+ + failed_count ;
/* silent errors are very rare, and are not a signal that a disk */
/* is going to fail. So, we just continue marking the block as bad */
/* just like in scrub */
+ + silent_error ;
silent_error_on_this_block = 1 ;
continue ;
}
}
} else {
/* if until now the parity doesn't need to be updated */
if ( ! parity_needs_to_be_updated ) {
/* for sure it's a CHG block, because EMPTY are processed before with "continue" */
/* and BLK and REP have "block_has_updated_hash()" as 1, and all the others */
/* have "parity_needs_to_be_updated" already at 1 */
assert ( block_state_get ( block ) = = BLOCK_STATE_CHG ) ;
/* if the hash represents the data unequivocally */
if ( hash_is_unique ( block - > hash ) ) {
/* check if the hash is changed */
if ( memcmp ( hash , block - > hash , BLOCK_HASH_SIZE ) ! = 0 ) {
/* the block is different, and we must update parity */
parity_needs_to_be_updated = 1 ;
}
} else {
/* if the hash is already invalid, we update parity */
parity_needs_to_be_updated = 1 ;
}
}
/* copy the hash in the block, but doesn't mark the block as hashed */
/* this allow in case of skipped block to do not save the failed computation */
memcpy ( block - > hash , hash , BLOCK_HASH_SIZE ) ;
/* note that in case of rehash, this is the wrong hash, */
/* but it will be overwritten later */
}
}
/* if we have only silent errors we can try to fix them on-the-fly */
2020-09-11 13:42:22 +02:00
/* note the fix is not written to disk, but used only to */
2019-01-07 14:06:15 +01:00
/* compute the new parity */
if ( ! error_on_this_block & & ! io_error_on_this_block & & silent_error_on_this_block ) {
unsigned failed_mac ;
int something_to_recover = 0 ;
/* sort the failed vector */
/* because with threads it may be in any order */
/* but RAID requires the indexes to be sorted */
qsort ( failed , failed_count , sizeof ( failed [ 0 ] ) , failed_compare_by_index ) ;
/* setup the blocks to recover */
failed_mac = 0 ;
for ( j = 0 ; j < failed_count ; + + j ) {
unsigned char * block_buffer = buffer [ failed [ j ] . index ] ;
unsigned char * block_copy = copy [ failed [ j ] . index ] ;
unsigned block_state = block_state_get ( failed [ j ] . block ) ;
/* we try to recover only if at least one BLK is present */
if ( block_state = = BLOCK_STATE_BLK )
something_to_recover = 1 ;
/* save a copy of the content just read */
/* that it's going to be overwritten by the recovering function */
memcpy ( block_copy , block_buffer , state - > block_size ) ;
if ( block_state = = BLOCK_STATE_CHG
& & hash_is_zero ( failed [ j ] . block - > hash )
) {
/* if the block was filled with 0, restore this state */
/* and avoid to recover it */
memset ( block_buffer , 0 , state - > block_size ) ;
} else {
/* if we have too many failures, we cannot recover */
if ( failed_mac > = state - > level )
break ;
/* otherwise it has to be recovered */
failed_map [ failed_mac + + ] = failed [ j ] . index ;
}
}
/* if we have something to recover and enough parity */
if ( something_to_recover & & j = = failed_count ) {
/* until now is misc */
state_usage_misc ( state ) ;
/* read the parity */
/* we are sure that parity exists because */
/* we have at least one BLK block */
for ( l = 0 ; l < state - > level ; + + l ) {
ret = parity_read ( & parity_handle [ l ] , blockcur , buffer [ diskmax + l ] , state - > block_size , log_error ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
if ( errno = = EIO ) {
log_tag ( " parity_error:%u:%s: Read EIO error. %s \n " , blockcur , lev_config_name ( l ) , strerror ( errno ) ) ;
if ( io_error > = state - > opt . io_error_limit ) {
log_fatal ( " DANGER! Unexpected input/output read error in the %s disk, it isn't possible to sync. \n " , lev_name ( l ) ) ;
log_fatal ( " Ensure that disk '%s' is sane and can be read. \n " , lev_config_name ( l ) ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
+ + io_error ;
goto bail ;
}
log_error ( " Input/Output error in parity '%s' at position '%u' \n " , lev_config_name ( l ) , blockcur ) ;
+ + io_error ;
io_error_on_this_block = 1 ;
continue ;
}
log_tag ( " parity_error:%u:%s: Read error. %s \n " , blockcur , lev_config_name ( l ) , strerror ( errno ) ) ;
log_fatal ( " WARNING! Unexpected read error in the %s disk, it isn't possible to sync. \n " , lev_name ( l ) ) ;
log_fatal ( " Ensure that disk '%s' can be read. \n " , lev_config_name ( l ) ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
/* until now is parity */
state_usage_parity ( state , & l , 1 ) ;
}
/* if no error in parity read */
if ( ! io_error_on_this_block ) {
/* try to fix the data */
/* note that this is a simple fix algorithm, that doesn't take into */
/* account the case of a wrong parity */
/* only 'fix' supports the most advanced fixing */
raid_rec ( failed_mac , failed_map , diskmax , state - > level , state - > block_size , buffer ) ;
/* until now is raid */
state_usage_raid ( state ) ;
/* check the result and prepare the data */
for ( j = 0 ; j < failed_count ; + + j ) {
unsigned char hash [ HASH_MAX ] ;
unsigned char * block_buffer = buffer [ failed [ j ] . index ] ;
unsigned char * block_copy = copy [ failed [ j ] . index ] ;
unsigned block_state = block_state_get ( failed [ j ] . block ) ;
if ( block_state = = BLOCK_STATE_BLK ) {
unsigned size = failed [ j ] . size ;
/* compute the hash of the recovered block */
if ( rehash ) {
memhash ( state - > prevhash , state - > prevhashseed , hash , block_buffer , size ) ;
} else {
memhash ( state - > hash , state - > hashseed , hash , block_buffer , size ) ;
}
/* until now is hash */
state_usage_hash ( state ) ;
/* if the hash doesn't match */
if ( memcmp ( hash , failed [ j ] . block - > hash , BLOCK_HASH_SIZE ) ! = 0 ) {
/* we have not recovered */
break ;
}
/* pad with 0 if needed */
if ( size < state - > block_size )
memset ( block_buffer + size , 0 , state - > block_size - size ) ;
} else {
/* otherwise restore the content */
/* because we are not interested in the old state */
/* that it's recovered for CHG, REP and DELETED blocks */
memcpy ( block_buffer , block_copy , state - > block_size ) ;
}
}
/* if all is processed, we have fixed it */
if ( j = = failed_count )
fixed_error_on_this_block = 1 ;
}
}
}
/* if we have read all the data required and it's correct, proceed with the parity */
if ( ! error_on_this_block & & ! io_error_on_this_block
& & ( ! silent_error_on_this_block | | fixed_error_on_this_block )
) {
/* update the parity only if really needed */
if ( parity_needs_to_be_updated ) {
/* compute the parity */
raid_gen ( diskmax , state - > level , state - > block_size , buffer ) ;
/* until now is raid */
state_usage_raid ( state ) ;
/* mark that the parity is going to be written */
parity_going_to_be_updated = 1 ;
}
/* for each disk, mark the blocks as processed */
for ( j = 0 ; j < diskmax ; + + j ) {
struct snapraid_block * block ;
if ( ! handle [ j ] . disk )
continue ;
block = fs_par2block_find ( handle [ j ] . disk , blockcur ) ;
if ( block = = BLOCK_NULL ) {
/* nothing to do */
continue ;
}
/* if it's a deleted block */
if ( block_state_get ( block ) = = BLOCK_STATE_DELETED ) {
/* the parity is now updated without this block, so it's now empty */
fs_deallocate ( handle [ j ] . disk , blockcur ) ;
continue ;
}
/* now all the blocks have the hash and the parity computed */
block_state_set ( block , BLOCK_STATE_BLK ) ;
}
/* we update the info block only if we really have updated the parity */
/* because otherwise the time/justsynced info would be misleading as we didn't */
/* wrote the parity at this time */
/* we also update the info block only if no silent error was found */
/* because has no sense to refresh the time for data that we know bad */
if ( parity_needs_to_be_updated
& & ! silent_error_on_this_block
) {
/* if rehash is needed */
if ( rehash ) {
/* store all the new hash already computed */
for ( j = 0 ; j < diskmax ; + + j ) {
if ( rehandle [ j ] . block )
memcpy ( rehandle [ j ] . block - > hash , rehandle [ j ] . hash , BLOCK_HASH_SIZE ) ;
}
}
/* update the time info of the block */
/* we are also clearing any previous bad and rehash flag */
info_set ( & state - > infoarr , blockcur , info_make ( now , 0 , 0 , 1 ) ) ;
}
}
/* if a silent (even if corrected) or input/output error was found */
/* mark the block as bad to have check/fix to handle it */
/* because our correction is in memory only and not yet written */
if ( silent_error_on_this_block | | io_error_on_this_block ) {
/* set the error status keeping the other info */
info_set ( & state - > infoarr , blockcur , info_set_bad ( info ) ) ;
}
/* finally schedule parity write */
/* Note that the calls to io_parity_write() are mandatory */
/* even if the parity doesn't need to be updated */
/* This because we want to keep track of the time usage */
state_usage_misc ( state ) ;
/* write start */
io_write_preset ( & io , blockcur , ! parity_going_to_be_updated ) ;
/* write the parity */
for ( l = 0 ; l < state - > level ; + + l ) {
unsigned levcur ;
io_parity_write ( & io , & levcur , waiting_map , & waiting_mac ) ;
/* until now is parity */
state_usage_parity ( state , waiting_map , waiting_mac ) ;
}
/* write finished */
io_write_next ( & io , blockcur , ! parity_going_to_be_updated , writer_error ) ;
/* handle errors reported */
for ( j = 0 ; j < IO_WRITER_ERROR_MAX ; + + j ) {
if ( writer_error [ j ] ) {
switch ( j + IO_WRITER_ERROR_BASE ) {
case TASK_STATE_IOERROR_CONTINUE :
+ + io_error ;
if ( io_error > = state - > opt . io_error_limit ) {
/* LCOV_EXCL_START */
log_fatal ( " DANGER! Unexpected input/output write error in a parity disk, it isn't possible to sync. \n " ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
goto bail ;
/* LCOV_EXCL_STOP */
}
break ;
case TASK_STATE_ERROR_CONTINUE :
+ + error ;
break ;
case TASK_STATE_IOERROR :
/* LCOV_EXCL_START */
+ + io_error ;
goto bail ;
/* LCOV_EXCL_STOP */
case TASK_STATE_ERROR :
/* LCOV_EXCL_START */
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
}
}
/* mark the state as needing write */
state - > need_write = 1 ;
/* count the number of processed block */
+ + countpos ;
/* progress */
if ( state_progress ( state , & io , blockcur , countpos , countmax , countsize ) ) {
/* LCOV_EXCL_START */
break ;
/* LCOV_EXCL_STOP */
}
/* autosave */
if ( ( state - > autosave ! = 0
& & autosavedone > = autosavelimit /* if we have reached the limit */
& & autosavemissing > = autosavelimit ) /* if we have at least a full step to do */
/* or if we have a forced autosave at the specified block */
| | ( state - > opt . force_autosave_at ! = 0 & & state - > opt . force_autosave_at = = blockcur )
) {
autosavedone = 0 ; /* restart the counter */
/* until now is misc */
state_usage_misc ( state ) ;
state_progress_stop ( state ) ;
msg_progress ( " Autosaving... \n " ) ;
/* before writing the new content file we ensure that */
/* the parity is really written flushing the disk cache */
for ( l = 0 ; l < state - > level ; + + l ) {
ret = parity_sync ( & parity_handle [ l ] ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
log_tag ( " parity_error:%u:%s: Sync error \n " , blockcur , lev_config_name ( l ) ) ;
log_fatal ( " DANGER! Unexpected sync error in %s disk. \n " , lev_name ( l ) ) ;
log_fatal ( " Ensure that disk '%s' is sane. \n " , lev_config_name ( l ) ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
}
/* now we can safely write the content file */
state_write ( state ) ;
state_progress_restart ( state ) ;
/* drop until now */
state_usage_waste ( state ) ;
}
}
end :
state_progress_end ( state , countpos , countmax , countsize ) ;
state_usage_print ( state ) ;
/* before returning we ensure that */
/* the parity is really written flushing the disk cache */
for ( l = 0 ; l < state - > level ; + + l ) {
ret = parity_sync ( & parity_handle [ l ] ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
log_tag ( " parity_error:%u:%s: Sync error \n " , blockcur , lev_config_name ( l ) ) ;
log_fatal ( " DANGER! Unexpected sync error in %s disk. \n " , lev_name ( l ) ) ;
log_fatal ( " Ensure that disk '%s' is sane. \n " , lev_config_name ( l ) ) ;
log_fatal ( " Stopping at block %u \n " , blockcur ) ;
+ + error ;
goto bail ;
/* LCOV_EXCL_STOP */
}
}
if ( error | | silent_error | | io_error ) {
msg_status ( " \n " ) ;
msg_status ( " %8u file errors \n " , error ) ;
msg_status ( " %8u io errors \n " , io_error ) ;
msg_status ( " %8u data errors \n " , silent_error ) ;
} else {
/* print the result only if processed something */
if ( countpos ! = 0 )
msg_status ( " Everything OK \n " ) ;
}
if ( error )
log_fatal ( " WARNING! Unexpected file errors! \n " ) ;
if ( io_error )
log_fatal ( " DANGER! Unexpected input/output errors! The failing blocks are now marked as bad! \n " ) ;
if ( silent_error )
log_fatal ( " DANGER! Unexpected data errors! The failing blocks are now marked as bad! \n " ) ;
if ( io_error | | silent_error ) {
log_fatal ( " Use 'snapraid status' to list the bad blocks. \n " ) ;
log_fatal ( " Use 'snapraid -e fix' to recover. \n " ) ;
}
log_tag ( " summary:error_file:%u \n " , error ) ;
log_tag ( " summary:error_io:%u \n " , io_error ) ;
log_tag ( " summary:error_data:%u \n " , silent_error ) ;
if ( error + silent_error + io_error = = 0 )
log_tag ( " summary:exit:ok \n " ) ;
else
log_tag ( " summary:exit:error \n " ) ;
log_flush ( ) ;
bail :
/* stop all the worker threads */
io_stop ( & io ) ;
for ( j = 0 ; j < diskmax ; + + j ) {
struct snapraid_file * file = handle [ j ] . file ;
struct snapraid_disk * disk = handle [ j ] . disk ;
ret = handle_close ( & handle [ j ] ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
log_tag ( " error:%u:%s:%s: Close error. %s \n " , blockcur , disk - > name , esc_tag ( file - > sub , esc_buffer ) , strerror ( errno ) ) ;
log_fatal ( " DANGER! Unexpected close error in a data disk. \n " ) ;
+ + error ;
/* continue, as we are already exiting */
/* LCOV_EXCL_STOP */
}
}
free ( handle ) ;
free ( zero_alloc ) ;
free ( copy_alloc ) ;
free ( copy ) ;
free ( rehandle_alloc ) ;
free ( failed ) ;
free ( failed_map ) ;
free ( waiting_map ) ;
io_done ( & io ) ;
if ( state - > opt . expect_recoverable ) {
if ( error + silent_error + io_error = = 0 )
return - 1 ;
} else {
if ( error + silent_error + io_error ! = 0 )
return - 1 ;
}
return 0 ;
}
int state_sync ( struct snapraid_state * state , block_off_t blockstart , block_off_t blockcount )
{
block_off_t blockmax ;
block_off_t used_paritymax ;
block_off_t file_paritymax ;
data_off_t size ;
int ret ;
struct snapraid_parity_handle parity_handle [ LEV_MAX ] ;
unsigned unrecoverable_error ;
unsigned l ;
int skip_sync = 0 ;
msg_progress ( " Initializing... \n " ) ;
blockmax = parity_allocated_size ( state ) ;
size = blockmax * ( data_off_t ) state - > block_size ;
/* minimum size of the parity files we expect */
used_paritymax = parity_used_size ( state ) ;
/* effective size of the parity files */
file_paritymax = 0 ;
if ( blockstart > blockmax ) {
/* LCOV_EXCL_START */
log_fatal ( " Error in the starting block %u. It's bigger than the parity size %u. \n " , blockstart , blockmax ) ;
exit ( EXIT_FAILURE ) ;
/* LCOV_EXCL_STOP */
}
/* adjust the number of block to process */
if ( blockcount ! = 0 & & blockstart + blockcount < blockmax ) {
blockmax = blockstart + blockcount ;
}
for ( l = 0 ; l < state - > level ; + + l ) {
data_off_t out_size ;
block_off_t parityblocks ;
/* create the file and open for writing */
ret = parity_create ( & parity_handle [ l ] , & state - > parity [ l ] , l , state - > file_mode , state - > block_size , state - > opt . parity_limit_size ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
log_fatal ( " WARNING! Without an accessible %s file, it isn't possible to sync. \n " , lev_name ( l ) ) ;
exit ( EXIT_FAILURE ) ;
/* LCOV_EXCL_STOP */
}
/* number of block in the parity file */
parity_size ( & parity_handle [ l ] , & out_size ) ;
parityblocks = out_size / state - > block_size ;
/* if the file is too small */
if ( parityblocks < used_paritymax ) {
log_fatal ( " WARNING! The %s parity has data only %u blocks instead of %u. \n " , lev_name ( l ) , parityblocks , used_paritymax ) ;
}
/* keep the smallest parity number of blocks */
if ( l = = 0 | | file_paritymax > parityblocks )
file_paritymax = parityblocks ;
}
/* if we do a full parity realloc or computation, having a wrong parity size is expected */
if ( ! state - > opt . force_realloc & & ! state - > opt . force_full ) {
/* if the parities are too small */
if ( file_paritymax < used_paritymax ) {
/* LCOV_EXCL_START */
log_fatal ( " DANGER! One or more the parity files are smaller than expected! \n " ) ;
if ( file_paritymax ! = 0 ) {
log_fatal ( " If this happens because you are using an old content file, \n " ) ;
log_fatal ( " you can 'sync' anyway using 'snapraid --force-full sync' \n " ) ;
log_fatal ( " to force a full rebuild of the parity. \n " ) ;
} else {
log_fatal ( " It's possible that the parity disks are not mounted. \n " ) ;
log_fatal ( " If instead you are adding a new parity level, you can 'sync' using \n " ) ;
log_fatal ( " 'snapraid --force-full sync' to force a full rebuild of the parity. \n " ) ;
}
exit ( EXIT_FAILURE ) ;
/* LCOV_EXCL_STOP */
}
}
unrecoverable_error = 0 ;
if ( state - > opt . prehash ) {
msg_progress ( " Hashing... \n " ) ;
ret = state_hash_process ( state , blockstart , blockmax , & skip_sync ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
+ + unrecoverable_error ;
/* continue, in case also doing the sync if ::skip_sync is not set */
/* LCOV_EXCL_STOP */
}
}
if ( ! skip_sync ) {
msg_progress ( " Resizing... \n " ) ;
/* now change the size of all parities */
for ( l = 0 ; l < state - > level ; + + l ) {
int is_modified ;
/* change the size of the parity file, truncating or extending it */
/* from this point all the DELETED blocks after the end of the parity are invalid */
/* and they are automatically removed when we save the new content file */
ret = parity_chsize ( & parity_handle [ l ] , & state - > parity [ l ] , & is_modified , size , state - > block_size , state - > opt . skip_fallocate , state - > opt . skip_space_holder ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
data_off_t out_size ;
parity_size ( & parity_handle [ l ] , & out_size ) ;
parity_overflow ( state , out_size ) ;
2020-09-11 13:42:22 +02:00
log_fatal ( " WARNING! Without a usable %s file, it isn't possible to sync. \n " , lev_name ( l ) ) ;
2019-01-07 14:06:15 +01:00
exit ( EXIT_FAILURE ) ;
/* LCOV_EXCL_STOP */
}
if ( is_modified )
state - > need_write = 1 ;
}
/* after resizing parity files, refresh again the free info */
state_refresh ( state ) ;
/**
* Save the new state before the sync but after the hashing phase
*
* This allows to recover after an aborted sync , and at the same time
* it allows to recover broken copied / moved files identified in the
* hashing phase .
*
* For example , think at this case :
* - Add some files at the array
* - Run a sync command , it will recompute the parity adding the new files
* - Abort the sync command before it stores the new content file
* - Delete the not yet synced files from the array
* - Run a new sync command
*
* The sync command has no way to know that the parity file was modified
* because the files triggering these changes are now deleted and they aren ' t
* listed in the content file .
* Instead , saving the new content file in advance , keeps track of all the parity
* that may be modified .
*/
if ( ! state - > opt . skip_content_write ) {
if ( state - > need_write )
state_write ( state ) ;
} else {
log_fatal ( " WARNING! Skipped state write for --test-skip-content-write option. \n " ) ;
}
msg_progress ( " Syncing... \n " ) ;
/* skip degenerated cases of empty parity, or skipping all */
if ( blockstart < blockmax ) {
ret = state_sync_process ( state , parity_handle , blockstart , blockmax ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
+ + unrecoverable_error ;
/* continue, as we are already exiting */
/* LCOV_EXCL_STOP */
}
} else {
msg_status ( " Nothing to do \n " ) ;
}
}
for ( l = 0 ; l < state - > level ; + + l ) {
ret = parity_close ( & parity_handle [ l ] ) ;
if ( ret = = - 1 ) {
/* LCOV_EXCL_START */
log_fatal ( " DANGER! Unexpected close error in %s disk. \n " , lev_name ( l ) ) ;
+ + unrecoverable_error ;
/* continue, as we are already exiting */
/* LCOV_EXCL_STOP */
}
}
/* abort if required */
if ( unrecoverable_error ! = 0 )
return - 1 ;
return 0 ;
}