//------------------------------------------------------------------------------ // Desc: Contains routines for recovering a database after // a failure. // // Tabs: 3 // // Copyright (c) 1991-2006 Novell, Inc. All Rights Reserved. // // This program is free software; you can redistribute it and/or // modify it under the terms of version 2 of the GNU General Public // License as published by the Free Software Foundation. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, contact Novell, Inc. // // To contact Novell about this file by physical or electronic mail, // you may find current contact information at www.novell.com // // $Id$ //------------------------------------------------------------------------------ #include "flaimsys.h" /**************************************************************************** Desc: This routine reads the next before-image block from the database. ****************************************************************************/ RCODE F_Db::readRollbackLog( FLMUINT uiLogEOF, // Address of end of rollback log. FLMUINT * puiCurrAddr, // This is the current address we are // reading in the log file. It // will be updated after reading the // data. F_BLK_HDR * pBlkHdr, // This is the buffer that is to hold // the data that is read from the // log file. FLMBOOL * pbIsBeforeImageBlk// Is block a before-image block? ) { RCODE rc = NE_SFLM_OK; FLMUINT uiFilePos; FLMUINT uiBlkSize = m_pDatabase->m_uiBlockSize; FLMUINT uiBytesRead; F_TMSTAMP StartTime; uiFilePos = *puiCurrAddr; // Verify that we are not going to read beyond the log EOF if (!FSAddrIsAtOrBelow( uiFilePos + uiBlkSize, uiLogEOF)) { rc = RC_SET( NE_SFLM_INCOMPLETE_LOG); goto Exit; } // Position to the appropriate place and read the data if (m_pDbStats) { m_pDbStats->bHaveStats = TRUE; m_pDbStats->LogBlockReads.ui64Count++; m_pDbStats->LogBlockReads.ui64TotalBytes += uiBlkSize; f_timeGetTimeStamp( &StartTime); } if (RC_BAD( rc = m_pSFileHdl->readBlock( uiFilePos, uiBlkSize, (FLMBYTE *)pBlkHdr, &uiBytesRead))) { if (rc == NE_FLM_IO_END_OF_FILE) { rc = RC_SET( NE_SFLM_INCOMPLETE_LOG); } if (m_pDbStats) { m_pDbStats->uiReadErrors++; } goto Exit; } if (m_pDbStats) { flmAddElapTime( &StartTime, &m_pDbStats->LogBlockReads.ui64ElapMilli); } if (uiBytesRead != uiBlkSize) { if (m_pDbStats) { m_pDbStats->uiLogBlockChkErrs++; } rc = RC_SET( NE_SFLM_DATA_ERROR); goto Exit; } if (RC_BAD( rc = flmPrepareBlockForUse( uiBlkSize, pBlkHdr))) { if (m_pDbStats && rc == NE_SFLM_BLOCK_CRC) { m_pDbStats->uiLogBlockChkErrs++; } goto Exit; } // See if before image bit is set. Need to unset it if it is. *pbIsBeforeImageBlk = (FLMBOOL)((pBlkHdr->ui8BlkFlags & BLK_IS_BEFORE_IMAGE) ? (FLMBOOL)TRUE : (FLMBOOL)FALSE); pBlkHdr->ui8BlkFlags &= ~(BLK_IS_BEFORE_IMAGE); // Adjust the current address for the next read uiFilePos += uiBlkSize; if (FSGetFileOffset( uiFilePos) >= m_pDatabase->m_uiMaxFileSize) { FLMUINT uiFileNumber = FSGetFileNumber( uiFilePos); if (!uiFileNumber) { uiFileNumber = FIRST_LOG_BLOCK_FILE_NUMBER; } else { uiFileNumber++; } if (uiFileNumber > MAX_LOG_BLOCK_FILE_NUMBER) { rc = RC_SET( NE_SFLM_DB_FULL); goto Exit; } uiFilePos = FSBlkAddress( uiFileNumber, 0 ); } *puiCurrAddr = uiFilePos; Exit: return( rc); } /**************************************************************************** Desc: This routine reads and processes a before-image block record in the log file. The reapply flag indicates whether the block should be written back to the database file. ****************************************************************************/ RCODE F_Db::processBeforeImage( FLMUINT uiLogEOF, // Address of the end of the rollback // log. FLMUINT * puiCurrAddrRV, // This is the current offset we are // reading in the log file. // It will be updated after reading the // data. F_BLK_HDR * pBlkHdr, // This is a pointer to a buffer that // will be used to hold the block that // is read. FLMBOOL bDoingRecovery, // Are we doing a recovery as opposed to // rolling back a transaction? FLMUINT64 ui64MaxTransID // Maximum transaction ID to recover to when // bDoingRecovery is TRUE. This parameter // is ignored when bDoingRecover is FALSE. ) { RCODE rc = NE_SFLM_OK; FLMUINT uiBlkAddress; FLMUINT uiBlkLength; #ifdef FLM_DBG_LOG FLMUINT64 ui64TransID; #endif FLMUINT uiBytesWritten; FLMBOOL bIsBeforeImageBlk; F_TMSTAMP StartTime; // Read the block from the log if (RC_BAD( rc = readRollbackLog( uiLogEOF, puiCurrAddrRV, pBlkHdr, &bIsBeforeImageBlk))) { goto Exit; } // Determine if we want to restore the block. // If we are doing a recovery, restore the block only if // its checkpoint is <= ui64MaxTransID. If we are // rolling back a transaction, restore the block only if // it is marked as a before-image block. // For the recovery process, multiple versions // of the same block may be restored if there are // multiple versions in the log. However, because // the versions will be in ascending order in the // file, ultimately, the one with the highest // checkpoint that does not exceed ui64MaxTransID // will be restored - which is precisely the one // we want to be restored for a recovery. // For a transaction rollback, it is impossible for us // to see more than one version of a block that is // marked as the before-image version, because we // started from a point in the log where the last // update transaction logged its first block. All // blocks after that point that have the BI bits // set should be restored. Any that do not have // the BI bit set should NOT be restored. if (bDoingRecovery) { if (pBlkHdr->ui64TransID > ui64MaxTransID) { goto Exit; } } else if (!bIsBeforeImageBlk) { goto Exit; } // Determine the block address before setting the checksum. uiBlkAddress = (FLMUINT)pBlkHdr->ui32BlkAddr; uiBlkLength = blkGetEnd( m_pDatabase->m_uiBlockSize, blkHdrSize( pBlkHdr), pBlkHdr); #ifdef FLM_DBG_LOG ui64TransID = pBlkHdr->ui64TransID; #endif if (RC_BAD( rc = flmPrepareBlockToWrite( m_pDatabase->m_uiBlockSize, pBlkHdr))) { goto Exit; } if (m_pDbStats) { m_pDbStats->bHaveStats = TRUE; m_pDbStats->LogBlockRestores.ui64Count++; m_pDbStats->LogBlockRestores.ui64TotalBytes += uiBlkLength; f_timeGetTimeStamp( &StartTime); } m_pSFileHdl->setMaxAutoExtendSize( m_pDatabase->m_uiMaxFileSize); m_pSFileHdl->setExtendSize( m_pDatabase->m_uiFileExtendSize); rc = m_pSFileHdl->writeBlock( uiBlkAddress, uiBlkLength, pBlkHdr, &uiBytesWritten); #ifdef FLM_DBG_LOG flmDbgLogWrite( m_pDatabase, uiBlkAddress, 0, ui64TransID, "ROLLBACK"); #endif if (m_pDbStats) { flmAddElapTime( &StartTime, &m_pDbStats->LogBlockRestores.ui64ElapMilli); if (RC_BAD( rc)) { m_pDbStats->uiWriteErrors++; } } Exit: return( rc); } /*************************************************************************** Desc: Writes the log header to disk. The checksum is calculated before writing the log header to disk. *****************************************************************************/ RCODE F_Database::writeDbHdr( SFLM_DB_STATS * pDbStats, F_SuperFileHdl * pSFileHdl, SFLM_DB_HDR * pDbHdr, // DB header to be written out. SFLM_DB_HDR * pCPDbHdr, // DB header as it was at the time // of the checkpoint. FLMBOOL bIsCheckpoint // Are we writing a checkpoint? If we // we are, we may write the DB header // as is. Otherwise, we need to make // sure we don't write out certain // parts of the DB header - they must // not be updated on disk until a // checkpoint actually occurs. ) { RCODE rc = NE_SFLM_OK; FLMUINT uiBytesWritten; IF_FileHdl * pCFileHdl = NULL; SFLM_DB_HDR * pTmpDbHdr; F_TMSTAMP StartTime; // Force any recent writes to disk before modifying the DB // header. This routine is generally called after having // written out data blocks or rollback blocks. It is // critial that any previous writes be flushed before the // header is updated because the header will generally have // been modified to point to the new things that were added. if (RC_BAD( rc = pSFileHdl->flush())) { goto Exit; } // No need to ever actually write the header to disk if this is // a temporary database if (m_bTempDb) { goto Exit; } pTmpDbHdr = m_pDbHdrWriteBuf; uiBytesWritten = sizeof( SFLM_DB_HDR); f_memcpy( pTmpDbHdr, pDbHdr, sizeof( SFLM_DB_HDR)); // If we are not doing a checkpoint, we don't really want // to write out certain items, so we restore them from // the database header as it was at the time of the last // checkpoint. if (!bIsCheckpoint && pCPDbHdr) { pTmpDbHdr->ui32RflLastCPFileNum = pCPDbHdr->ui32RflLastCPFileNum; pTmpDbHdr->ui32RflLastCPOffset = pCPDbHdr->ui32RflLastCPOffset; pTmpDbHdr->ui64CurrTransID = pCPDbHdr->ui64CurrTransID; pTmpDbHdr->ui64TransCommitCnt = pCPDbHdr->ui64TransCommitCnt; pTmpDbHdr->ui32FirstAvailBlkAddr = pCPDbHdr->ui32FirstAvailBlkAddr; pTmpDbHdr->ui32LogicalEOF = pCPDbHdr->ui32LogicalEOF; pTmpDbHdr->ui32BlksChangedSinceBackup = pCPDbHdr->ui32BlksChangedSinceBackup; pTmpDbHdr->ui64LastRflCommitID = pCPDbHdr->ui64LastRflCommitID; } // Header is always written out in native format. Set the CRC flmAssert( !hdrIsNonNativeFormat( pTmpDbHdr)); pTmpDbHdr->ui32HdrCRC = calcDbHdrCRC( pTmpDbHdr); // Now update the log header record on disk. if (pDbStats) { pDbStats->bHaveStats = TRUE; pDbStats->DbHdrWrites.ui64Count++; pDbStats->DbHdrWrites.ui64TotalBytes += uiBytesWritten; f_timeGetTimeStamp( &StartTime); } if( RC_BAD( rc = pSFileHdl->getFileHdl( 0, TRUE, &pCFileHdl))) { goto Exit; } if( RC_BAD( rc = pCFileHdl->write( 0, uiBytesWritten, pTmpDbHdr, &uiBytesWritten))) { if (pDbStats) { pDbStats->uiWriteErrors++; } goto Exit; } if (pDbStats) { flmAddElapTime( &StartTime, &pDbStats->DbHdrWrites.ui64ElapMilli); } // Finally, force the header to disk. if (RC_BAD( rc = pCFileHdl->flush())) { goto Exit; } Exit: return( rc); } /**************************************************************************** Desc: This routine recovers the database to a physically consistent state. Ret: NE_SFLM_OK - Indicates the database has been recovered. other - other FLAIM error codes ****************************************************************************/ RCODE F_Db::physRollback( FLMUINT uiLogEOF, FLMUINT uiFirstLogBlkAddr, // Address of first log block FLMBOOL bDoingRecovery, // Doing recovery? If so, we will // ignore blocks whose transaction // ID is higher than ui64MaxTransID. // Also, we will not check the BI // bits in the logged blocks, because // we are not rolling back a // transaction. FLMUINT64 ui64MaxTransID // Ignored when bDoingRecovery is // FALSE ) { RCODE rc = NE_SFLM_OK; FLMUINT uiCurrAddr; FLMBYTE * pucBlk = NULL; // If the log is empty, no need to do anything. // A uiFirstLogBlkAddr of zero indicates that there // is nothing in the log to rollback. This will be true // if we are rolling back a transaction, and the transaction // has not logged anything or if we are doing a recovery and // nothing was logged since the last checkpoint. if (uiLogEOF == m_pDatabase->m_uiBlockSize || !uiFirstLogBlkAddr) { goto Exit; // Will return NE_SFLM_OK } // Allocate a buffer to be used for reading. if( RC_BAD( rc = f_allocAlignedBuffer( m_pDatabase->m_uiBlockSize, (void **)&pucBlk))) { goto Exit; } // Start from beginning of log and read to EOF restoring before-image // blocks along the way. uiCurrAddr = uiFirstLogBlkAddr; while (FSAddrIsBelow( uiCurrAddr, uiLogEOF)) { if (RC_BAD( rc = processBeforeImage( uiLogEOF, &uiCurrAddr, (F_BLK_HDR *)pucBlk, bDoingRecovery, ui64MaxTransID))) { goto Exit; } } // Force the writes to the file. if (RC_BAD( rc = m_pSFileHdl->flush())) { goto Exit; } Exit: // Free the memory handle, if one was allocated. if (pucBlk) { f_freeAlignedBuffer( (void **)&pucBlk); } return( rc); }