Files
mars-nwe/src/nwnss/zlss/zfsFileMap.c
2026-06-19 11:11:42 +02:00

6264 lines
168 KiB
C

/****************************************************************************
|
| (C) Copyright 1995-1997 Novell, Inc.
| All Rights Reserved.
|
| This program is free software; you can redistribute it and/or
| modify it under the terms of version 2 of the GNU General Public
| License as published by the Free Software Foundation.
|
| This program is distributed in the hope that it will be useful,
| but WITHOUT ANY WARRANTY; without even the implied warranty of
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
| GNU General Public License for more details.
|
| You should have received a copy of the GNU General Public License
| along with this program; if not, contact Novell, Inc.
|
| To contact Novell about this file by physical or electronic mail,
| you may find current contact information at www.novell.com
|
|***************************************************************************
|
| NetWare Advance File Services (NSS) module
|
|---------------------------------------------------------------------------
|
| $Author: gpachner $
| $Date: 2007-05-08 05:01:53 +0530 (Tue, 08 May 2007) $
|
| $RCSfile$
| $Revision: 1979 $
|
|---------------------------------------------------------------------------
| This module is used to:
| This defines all of the primitive BEASTS inside of PSS
+-------------------------------------------------------------------------*/
#if defined(NSS_USERSPACE)
#include <internal/NssPageRuntime.h>
struct block_device;
#else
#include <linux/highmem.h>
#include <linux/blkdev.h>
#endif
#include <guid.h>
#include <stdio.h>
#include <stdlib.h>
#include <xError.h>
#include <xCache.h>
#include <inst.h>
#include <stdint.h>
#include "msgIO.h"
#include "zfs.h"
#include "comnBeasts.h"
#include "comnBeastClass.h"
#include "comnAuthorize.h"
#include "zParams.h"
#include "comnPublics.h"
#include "pssmpk.h"
#include "fileHandle.h"
//#include "adminVolume.h"
#include "nameSpace.h"
#include "zfsAsyncio.h"
#include "zlog.h"
#include "zlssStartup.h"
#include "zfsXTree.h"
#include "zfsFileMap.h"
#include "userTree.h"
#include "dirQuotas.h"
#if defined(NSS_USERSPACE)
#ifndef GFP_USER
#define GFP_USER 0
#endif
#ifndef READ
#define READ 0
#endif
#ifndef WRITE
#define WRITE 1
#endif
#undef ClearStatus
#define ClearStatus(_msg) \
do { (_msg)->sys.status = zOK; (_msg)->sys.where = (QUAD)(uintptr_t)WHERE; } while (0)
#undef SetStatusFromErrno
#define SetStatusFromErrno(_msg, _genMsg) \
do { (_msg)->sys.status = GetErrno((_genMsg)); \
(_msg)->sys.where = (QUAD)(uintptr_t)((_genMsg)->errStatusSetter); } while (0)
#endif
/**************************************************************************
* read a block that belongs to the btree
**************************************************************************/
void ZFS_Return (ZioFmap_s *zio)
{
ASSERT_MPKNSS_LOCK();
FSM_RUN( &zio->io.fsm);
}
void readVolBlkBuffer(Asyncio_s *aio)
{
ASSERT_MPKNSS_LOCK();
DOWN_LATCH( &aio->buffer->agent.latch);
if (aio->status != zOK)
{
aio->buffer->agent.status = aio->status;
CACHE_RELEASE(aio->buffer);
aio->buffer = NULL;
}
FSM_RUN(&aio->fsm);
}
void asyncReadVolBlk(Asyncio_s *aio, voidfunc_t action)
{
RootBeast_s *beast;
ASSERT_MPKNSS_LOCK();
zASSERT(aio->volBlk > 0);
// zASSERT(aio->volBlk < DEBUG_MAX_FILE_BLK);
if (!isCached(aio, action))
{
FSM_PUSH(&aio->fsm, action);
FSM_PUSH(&aio->fsm, readVolBlkBuffer);
beast = STRUCT(aio->mycache, RootBeast_s, ROOTmycache);
if ((aio->fileBlk < 0) || (aio->volBlk == 0) || (beast->ROOTzid < zFIRST_ALLOCATABLE_ZID))
{
asyncCacheAllocBuffer(aio, ZFSMAL_asyncReadBlk,
ZFS_BlockSignalHandler);
}
else
{
asyncCacheAllocBufferForUserData(aio, ZFSMAL_asyncReadBlk,
ZFS_BlockSignalHandler);
}
}
}
/****************************************************************************
* Search the Direct File Map
*****************************************************************************/
Blknum_t searchDirectMap(
Fmap_s *fmap,
Blknum_t fileBlk,
Blknum_t *seed,
Blknum_t *length,
NINT *index)
{
NINT i;
ASSERT_MPKNSS_LOCK();
if (fmap->numRecs <= 1)
return 0;
for (i=1; i < fmap->numRecs; i++)
{
if (fmap->dirExt[i].count > fileBlk)
{
*index = i;
*length = fmap->dirExt[i].count - fileBlk;
return (fmap->dirExt[i].poolBlk +
(fileBlk - fmap->dirExt[i-1].count));
}
}
*seed = fmap->dirExt[i-1].poolBlk + fmap->dirExt[i-1].count
- fmap->dirExt[i-2].count;
return 0;
}
/****************************************************************************
* searchLeaf and searchBranch:
* Search the leaf and branch nodes of a btree
*****************************************************************************/
Blknum_t searchLeaf(
FmapNode_s *node,
Blknum_t fileBlk,
Blknum_t *length,
NINT *index)
{
Blknum_t numRecs;
NINT offset;
NINT i;
#if NSS_DEBUG IS_ENABLED
NINT testCounter = 0;
#endif
ASSERT_MPKNSS_LOCK();
offset = 0;
numRecs = node->head.numRecs;
/** Added an extra check to make sure we don't get stuck here forever in
** case there is a corrupted node
**/
if (fileBlk >= node->extent[numRecs - 1].count)
{
*index = numRecs;
*length = 0;
return 0;
}
while (1)
{
#if NSS_DEBUG IS_ENABLED
testCounter++;
zASSERT(numRecs);
#endif
numRecs = numRecs >> 1;
if (fileBlk >= node->extent[offset + numRecs].count)
{
if (fileBlk < node->extent[offset + numRecs + 1].count)
{
i = offset + numRecs + 1;
break;
}
else
{
offset += numRecs;
numRecs = node->head.numRecs - offset;
}
}
else
{
if (fileBlk >= node->extent[offset + numRecs - 1].count)
{
i = offset + numRecs;
break;
}
}
#if NSS_DEBUG IS_ENABLED
zASSERT(testCounter < 100);
#endif
}
*index = i;
*length = node->extent[i].count - fileBlk;
if (node->extent[i].poolBlk == 0)
return 0;
else
return (node->extent[i].poolBlk +
(fileBlk - node->extent[i-1].count));
}
Blknum_t searchBranch(FmapNode_s *node, Blknum_t fileBlk, NINT *index)
{
Blknum_t numRecs;
NINT offset;
#if NSS_DEBUG IS_ENABLED
NINT testCounter = 0;
#endif
ASSERT_MPKNSS_LOCK();
offset = 0;
numRecs = node->head.numRecs;
/** Added an extra check to make sure we don't get stuck here forever in
** case there is a corrupted node
**/
if (fileBlk >= node->extent[numRecs - 1].count)
{
zASSERT(0);
*index = numRecs;
return 0;
}
while (1)
{
#if NSS_DEBUG IS_ENABLED
testCounter++;
zASSERT(numRecs);
#endif
numRecs = numRecs >> 1;
if (fileBlk >= node->extent[offset + numRecs].count)
{
if (fileBlk < node->extent[offset + numRecs + 1].count)
{
*index = offset + numRecs + 1;
return (node->extent[offset + numRecs + 1].poolBlk);
}
else
{
offset += numRecs;
numRecs = node->head.numRecs - offset;
}
}
else
{
if (fileBlk >= node->extent[offset + numRecs - 1].count)
{
*index = offset + numRecs;
return (node->extent[offset + numRecs].poolBlk);
}
}
#if NSS_DEBUG IS_ENABLED
zASSERT(testCounter < 100);
#endif
}
}
/*************************************************************************
* findBlk in Fmap: (use fsms to do this )
* Search the filemap recursively (if it is a branch) until we find
* the entry we are looking for.
* Find the fileBlk in the Fmap
*************************************************************************/
void asyncFindBlkInFmap(ZioFmap_s *zio)
{
ZFSStorageInfo_s *stInfo;
Fmap_s *fmap;
FmapNode_s *node;
Blknum_t fileBlk = zio->fmap.saveFileBlk;
Blknum_t len;
NINT index;
ASSERT_MPKNSS_LOCK();
if (zio->io.buffer == NULL)
{
/** There was a READ error **/
ZFS_Return(zio);
return;
}
stInfo = STRUCT(zio->io.mycache, RootBeast_s, ROOTmycache)->storage.zfsInfo;
fmap = &stInfo->fmap;
node = (FmapNode_s *)(zio->io.buffer->pBuf.data);
if (node->head.state & BT_LEAF)
{
zASSERT((node->head.state & BT_ROOT) ?
(node->head.magic == FMAP_BT_ROOT) :
(node->head.magic == FMAP_BT_LEAF));
if ((node->head.state & BT_ROOT) ?
(node->head.magic != FMAP_BT_ROOT) :
(node->head.magic != FMAP_BT_LEAF))
{
zio->io.status = zERR_MEDIA_CORRUPTED;
CACHE_RELEASE(zio->io.buffer);
zio->io.buffer = NULL;
ZFS_Return(zio);
return;
}
zio->io.volBlk = searchLeaf(node, fileBlk, &len, &index);
zASSERT(zio->io.volBlk >= 0);
// zASSERT(zio->io.volBlk < DEBUG_MAX_FILE_BLK);
zio->io.fileBlk = fileBlk;
CACHE_RELEASE(zio->io.buffer);
zio->io.buffer = NULL;
if (zio->io.volBlk == 0)
{
ASSERT_SLATCH( &CACHE_SparseBuffer.agent.latch);
ADD_LATCH( &CACHE_SparseBuffer.agent.latch);
zio->io.buffer = &CACHE_SparseBuffer;
ZFS_Return(zio);
return;
}
else
{
asyncReadVolBlk(&zio->io, ZFS_Return);
return;
}
}
else
{
zASSERT((node->head.state & BT_ROOT) ?
(node->head.magic == FMAP_BT_ROOT) :
(node->head.magic == FMAP_BT_BRANCH));
if ((node->head.state & BT_ROOT) ?
(node->head.magic != FMAP_BT_ROOT) :
(node->head.magic != FMAP_BT_BRANCH))
{
zio->io.status = zERR_MEDIA_CORRUPTED;
CACHE_RELEASE(zio->io.buffer);
zio->io.buffer = NULL;
ZFS_Return(zio);
return;
}
zio->io.volBlk = searchBranch(node, fileBlk, &index);
zASSERT(zio->io.volBlk > 0);
// zASSERT(zio->io.volBlk < DEBUG_MAX_FILE_BLK);
CACHE_RELEASE(zio->io.buffer);
zio->io.buffer = NULL;
zio->io.fileBlk = POOLBLK_TO_INDIRECT(zio->io.volBlk);
asyncReadVolBlk(&zio->io, asyncFindBlkInFmap);
return;
}
}
/*************************************************************************
* ZFS_asyncReadFileBlk
*************************************************************************/
void ZFSVOL_VOL_asyncReadFileBlk(Asyncio_s *asyncio)
{
ZioFmap_s *zio = STRUCT(asyncio, ZioFmap_s, io);
RootBeast_s *beast;
ZFSStorageInfo_s *stInfo;
Fmap_s *fmap;
Blknum_t seed = 0;
Blknum_t len = 0;
NINT index = 0;
ASSERT_MPKNSS_LOCK();
/* Locate the beast for this I/O */
beast = STRUCT(zio->io.mycache, RootBeast_s, ROOTmycache);
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
zASSERT(zio->io.mode == CACHE_READ);
ASSERT_LATCH(&beast->ROOTbeastLatch);
zio->io.buffer = NULL;
if (zio->io.fileBlk >= stInfo->nextBlk )
{
/** FileBlk to be read is beyond the last block of the file */
/** Return a block of zeroes **/
ASSERT_SLATCH( &CACHE_SparseBuffer.agent.latch);
ADD_LATCH( &CACHE_SparseBuffer.agent.latch);
zio->io.buffer = &CACHE_SparseBuffer;
ZFS_Return(zio);
return;
}
if (zio->io.fileBlk < fmap->dirExt[fmap->numRecs - 1].count)
{
/** The fileBlk is in the direct portion of the filemap **/
zio->io.volBlk = searchDirectMap(fmap, zio->io.fileBlk, &seed,
&len, &index);
zASSERT(zio->io.volBlk > 0);
// zASSERT(zio->io.volBlk < DEBUG_MAX_FILE_BLK);
asyncReadVolBlk(&zio->io, ZFS_Return);
return;
}
else
{
/** The fileBlk is in the Btree portion of the filemap **/
zASSERT(fmap->root != INVALID_BLK_ZERO);
zio->fmap.saveFileBlk = zio->io.fileBlk;
zio->io.volBlk = fmap->root;
zio->io.fileBlk = POOLBLK_TO_INDIRECT(zio->io.volBlk);
asyncReadVolBlk(&zio->io, asyncFindBlkInFmap);
return;
}
}
/*************************************************************************
*************************************************************************/
/*************************************************************************
* GetFileBlk
*************************************************************************/
STATUS findBlkInFileMap(
GeneralMsg_s *genMsg,
ZFSStorageInfo_s *stInfo,
Blknum_t fileBlk,
Blknum_t *poolBlk)
{
Fmap_s *fmap = &stInfo->fmap;
Buffer_s *buf = NULL;
FmapNode_s *node;
NINT index; /** unused **/
Blknum_t len; /** unused **/
Blknum_t seed; /** unused **/
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
if (fmap->dirExt[fmap->numRecs - 1].count > fileBlk)
{
*poolBlk = searchDirectMap(fmap, fileBlk, &seed, &len, &index);
zASSERT(*poolBlk > 0);
}
else
{
READBLK_IO_MSG(iomsg, stInfo->comnInfo.beast, fmap->root, CACHE_READ);
SET_DEBUG_ID(iomsg, 15);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
zASSERT(node->head.magic == FMAP_BT_ROOT);
if (node->head.magic != FMAP_BT_ROOT)
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
while (!(node->head.state & BT_LEAF))
{
*poolBlk = searchBranch(node, fileBlk, &index);
CACHE_RELEASE(buf);
buf = NULL;
READBLK_IO_MSG(iomsg, stInfo->comnInfo.beast, *poolBlk, CACHE_READ);
SET_DEBUG_ID(iomsg, 16);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_BRANCH));
if ((node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_BRANCH))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
}
*poolBlk = searchLeaf(node, fileBlk, &len, &index);
CACHE_RELEASE(buf);
zASSERT(*poolBlk >= 0);
}
// zASSERT(*poolBlk < DEBUG_MAX_FILE_BLK);
return zOK;
}
void syncGrowBtree(
ZfsXaction_s *xaction,
Buffer_s *bufChild,
Buffer_s *bufParent,
NINT *indexParent,
RootBeast_s *beast)
{
FmapNode_s *parent;
FmapNode_s *child;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
ASSERT_MPKNSS_LOCK();
child = (FmapNode_s *)bufChild->pBuf.data;
parent = (FmapNode_s *)bufParent->pBuf.data;
child->head.state &= ~BT_ROOT;
if (child->head.state & BT_LEAF)
child->head.magic = FMAP_BT_LEAF;
else
child->head.magic = FMAP_BT_BRANCH;
child->head.fnh_internalID = beast->ROOTinternalID;
parent->head.fnh_internalID = beast->ROOTinternalID;
child->head.fnh_zid = beast->zid;
parent->head.fnh_zid = beast->zid;
parent->head.magic = FMAP_BT_ROOT;
parent->head.state = BT_ROOT;
parent->head.leafLink = INVALID_BLK_ZERO;
parent->head.lsn = 0;
parent->extent[0].count = child->extent[0].count;
parent->extent[0].poolBlk = child->extent[0].poolBlk;
parent->head.numRecs = 1;
zASSERT(bufChild->volBlk != 0);
parent->extent[parent->head.numRecs].count = MAX_FILE_BLK;
parent->extent[parent->head.numRecs].poolBlk = bufChild->volBlk;
parent->head.numRecs++;
*indexParent = parent->head.numRecs -1;
if (xaction == NULL)
{
return;
}
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(2) + sizeof(FmapGrow_s) );
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_GROW, xaction, logBuffer,
2, logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], bufChild->volBlk,
child->head.lsn, bufChild, xaction, 0);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[1], bufParent->volBlk,
parent->head.lsn, bufParent, xaction, 1);
ZLOG_ALLOC_BLOCK(logBlks[1]);
memcpy(&logRecord->u.grow.extent[0], &parent->extent[0],
sizeof(logRecord->u.grow.extent));
// Either Child or Parent is ok for next 2 lines as ID, ZID are the same
logRecord->u.grow.fg_internalID = child->head.fnh_internalID;
logRecord->u.grow.fg_zid = child->head.fnh_zid;
child->head.lsn = logBuffer->ZXR_Lsn;
parent->head.lsn = logBuffer->ZXR_Lsn;
ZLOG_BIND(xaction, bufChild);
ZLOG_BIND(xaction, bufParent);
ZLOG_ReleaseRecord(xaction);
return;
}
void syncSplitBtree(
ZfsXaction_s *xaction,
Buffer_s *bufChild,
Buffer_s *bufSibling,
Buffer_s *bufParent,
Extent_s *extent)
{
FmapNode_s *parent;
FmapNode_s *child;
FmapNode_s *sibling;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
ASSERT_MPKNSS_LOCK();
child = (FmapNode_s *)bufChild->pBuf.data;
parent = (FmapNode_s *)bufParent->pBuf.data;
sibling = (FmapNode_s *)bufSibling->pBuf.data;
sibling->head.magic = child->head.magic;
sibling->head.fnh_internalID = child->head.fnh_internalID;
sibling->head.fnh_zid = child->head.fnh_zid;
sibling->head.state = child->head.state;
sibling->head.leafLink = INVALID_BLK_ZERO;
sibling->head.lsn = 0;
zASSERT(bufSibling->volBlk != 0);
if (child->head.state & BT_LEAF)
{
child->head.leafLink = bufSibling->volBlk;
parent->extent[parent->head.numRecs - 1].count =
child->extent[child->head.numRecs - 1].count;
parent->extent[parent->head.numRecs].count = MAX_FILE_BLK;
parent->extent[parent->head.numRecs].poolBlk = bufSibling->volBlk;
parent->head.numRecs++;
sibling->extent[0].count =
child->extent[child->head.numRecs - 1].count;
sibling->extent[0].poolBlk =
child->extent[child->head.numRecs - 1].poolBlk;
sibling->head.numRecs = 1;
sibling->extent[sibling->head.numRecs].count =
sibling->extent[sibling->head.numRecs - 1].count +
extent->lengthOfExtent;
sibling->extent[sibling->head.numRecs].poolBlk = extent->poolBlkNum;
sibling->head.numRecs++;
}
else
{
parent->extent[parent->head.numRecs - 1].count =
child->extent[child->head.numRecs - 2].count;
parent->extent[parent->head.numRecs].count = MAX_FILE_BLK;
parent->extent[parent->head.numRecs].poolBlk = bufSibling->volBlk;
parent->head.numRecs++;
sibling->extent[0].count =
child->extent[child->head.numRecs - 2].count;
sibling->extent[0].poolBlk =
child->extent[child->head.numRecs - 2].poolBlk;
sibling->head.numRecs = 1;
sibling->extent[sibling->head.numRecs].count =
child->extent[child->head.numRecs - 1].count;
sibling->extent[sibling->head.numRecs].poolBlk =
child->extent[child->head.numRecs - 1].poolBlk;
sibling->head.numRecs++;
child->extent[child->head.numRecs - 1].count = 0;
child->extent[child->head.numRecs - 1].poolBlk = 0;
child->head.numRecs--;
}
if (xaction == NULL)
{
return;
}
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(3) +
sizeof(FmapSplit_s) - 1 +
(sibling->head.numRecs * sizeof(FmapExt_s)) );
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_SPLIT, xaction, logBuffer,
3, logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], bufChild->volBlk,
child->head.lsn, bufChild, xaction, 0);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[1], bufParent->volBlk,
parent->head.lsn, bufParent, xaction, 1);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[2], bufSibling->volBlk,
sibling->head.lsn, bufSibling, xaction, 2);
ZLOG_ALLOC_BLOCK(logBlks[2]);
memcpy(&logRecord->u.split.data[0], &sibling->extent[0],
sibling->head.numRecs * sizeof(FmapExt_s));
memcpy(&logRecord->u.split.parentExt[0],
&parent->extent[parent->head.numRecs - 2], 2 * sizeof(FmapExt_s));
logRecord->u.split.pIndex = parent->head.numRecs - 2;
logRecord->u.split.childLink = bufSibling->volBlk;
logRecord->u.split.sibLink = INVALID_BLK_ZERO;
logRecord->u.split.childMagic = child->head.magic;
logRecord->u.split.childState = child->head.state;
logRecord->u.split.numRecs = sibling->head.numRecs;
logRecord->u.split.fs_internalID = sibling->head.fnh_internalID;
logRecord->u.split.fs_zid = sibling->head.fnh_zid;
child->head.lsn = logBuffer->ZXR_Lsn;
parent->head.lsn = logBuffer->ZXR_Lsn;
sibling->head.lsn = logBuffer->ZXR_Lsn;
ZLOG_BIND(xaction, bufChild);
ZLOG_BIND(xaction, bufParent);
ZLOG_BIND(xaction, bufSibling);
ZLOG_ReleaseRecord(xaction);
return;
}
Buffer_s *splitBtreeBranchSparse(
ZfsXaction_s *xaction,
Buffer_s *bufChild,
Buffer_s *bufSibling,
Buffer_s *bufParent,
NINT index,
NINT *parentIndex)
{
FmapNode_s *parent;
FmapNode_s *child;
FmapNode_s *sibling;
NINT pIndex = *parentIndex;
NINT tmpIndex;
Buffer_s *retBuf;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
ASSERT_MPKNSS_LOCK();
child = (FmapNode_s *)bufChild->pBuf.data;
parent = (FmapNode_s *)bufParent->pBuf.data;
sibling = (FmapNode_s *)bufSibling->pBuf.data;
sibling->head.magic = child->head.magic;
sibling->head.fnh_internalID = child->head.fnh_internalID;
sibling->head.fnh_zid = child->head.fnh_zid;
sibling->head.state = child->head.state;
sibling->head.leafLink = INVALID_BLK_ZERO;
zASSERT(bufSibling->volBlk != 0);
if (child->extent[index].count == MAX_FILE_BLK)
{
zASSERT(index == child->head.numRecs -1);
zASSERT(parent->extent[pIndex].count == MAX_FILE_BLK);
zASSERT(pIndex == parent->head.numRecs -1);
parent->extent[pIndex].count =
child->extent[index - 1].count;
parent->extent[pIndex + 1].count = MAX_FILE_BLK;
parent->extent[pIndex + 1].poolBlk = bufSibling->volBlk;
parent->head.numRecs++;
sibling->extent[0].count =
child->extent[index - 1].count;
sibling->extent[0].poolBlk =
child->extent[index -1].poolBlk;
sibling->head.numRecs = 1;
sibling->extent[sibling->head.numRecs].count =
child->extent[index].count;
sibling->extent[sibling->head.numRecs].poolBlk =
child->extent[index].poolBlk;
sibling->head.numRecs++;
child->extent[index].count = 0;
child->extent[index].poolBlk = 0;
child->head.numRecs--;
*parentIndex = sibling->head.numRecs - 1;
retBuf = bufSibling;
}
else
{
tmpIndex = child->head.numRecs >> 1;
/** Copy half the records to the new node **/
memcpy(&sibling->extent[0], &child->extent[tmpIndex],
(sizeof(FmapExt_s) * (child->head.numRecs - tmpIndex)));
sibling->head.numRecs = child->head.numRecs - tmpIndex;
child->head.numRecs = tmpIndex + 1;
bzero(&child->extent[child->head.numRecs],
(sizeof(FmapExt_s) * (FMAP_MAX - child->head.numRecs)));
memmove(&parent->extent[pIndex + 2], &parent->extent[pIndex + 1],
sizeof(FmapExt_s) * (parent->head.numRecs - (pIndex + 1)));
parent->extent[pIndex + 1].count = parent->extent[pIndex].count;
parent->extent[pIndex + 1].poolBlk = bufSibling->volBlk;
parent->extent[pIndex].count = sibling->extent[0].count;
parent->head.numRecs++;
if (index < child->head.numRecs)
{
*parentIndex = index;
retBuf = bufChild;
}
else
{
*parentIndex = index - (child->head.numRecs - 1);
retBuf = bufSibling;
}
}
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(3) +
sizeof(FmapSplit_s) - 1 +
(sibling->head.numRecs * sizeof(FmapExt_s)));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_SPLIT, xaction, logBuffer,
3, logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], bufChild->volBlk,
child->head.lsn, bufChild, xaction, 0);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[1], bufParent->volBlk,
parent->head.lsn, bufParent, xaction, 1);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[2], bufSibling->volBlk,
sibling->head.lsn, bufSibling, xaction, 2);
ZLOG_ALLOC_BLOCK(logBlks[2]);
memcpy(&logRecord->u.split.data[0], &sibling->extent[0],
sibling->head.numRecs * sizeof(FmapExt_s));
memcpy(&logRecord->u.split.parentExt[0],
&parent->extent[pIndex], 2 * sizeof(FmapExt_s));
logRecord->u.split.pIndex = pIndex;
logRecord->u.split.childLink = INVALID_BLK_ZERO;
logRecord->u.split.sibLink = INVALID_BLK_ZERO;
logRecord->u.split.childMagic = child->head.magic;
logRecord->u.split.childState = child->head.state;
logRecord->u.split.numRecs = sibling->head.numRecs;
logRecord->u.split.fs_internalID = sibling->head.fnh_internalID;
logRecord->u.split.fs_zid = sibling->head.fnh_zid;
child->head.lsn = logBuffer->ZXR_Lsn;
parent->head.lsn = logBuffer->ZXR_Lsn;
sibling->head.lsn = logBuffer->ZXR_Lsn;
ZLOG_BIND(xaction, bufChild);
ZLOG_BIND(xaction, bufParent);
ZLOG_BIND(xaction, bufSibling);
ZLOG_ReleaseRecord(xaction);
return retBuf;
}
void updateNodeEntry(
ZfsXaction_s *xaction,
Buffer_s *buf,
NINT index,
Extent_s *extent,
Blknum_t fileBlk)
{
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
FmapExt_s origExt[3];
FmapNode_s *node = (FmapNode_s *)buf->pBuf.data;
WORD origNumRecs;
ASSERT_MPKNSS_LOCK();
memcpy(&origExt[0], &node->extent[index], 3 * sizeof(FmapExt_s));
origNumRecs = node->head.numRecs;
if (fileBlk == node->extent[index -1].count)
{
memmove(&node->extent[index + 1], &node->extent[index],
sizeof(FmapExt_s) * (node->head.numRecs - index));
node->head.numRecs++;
node->extent[index].count = fileBlk + extent->lengthOfExtent;
node->extent[index].poolBlk = extent->poolBlkNum;
}
else if ((fileBlk + extent->lengthOfExtent) == node->extent[index].count)
{
memmove(&node->extent[index + 2], &node->extent[index + 1],
sizeof(FmapExt_s) * (node->head.numRecs - (index + 1)));
node->head.numRecs++;
node->extent[index + 1].count = node->extent[index].count;
node->extent[index + 1].poolBlk = extent->poolBlkNum;
node->extent[index].count = fileBlk;
}
else
{
memmove(&node->extent[index + 3], &node->extent[index + 1],
sizeof(FmapExt_s) * (node->head.numRecs - (index + 1)));
node->head.numRecs+=2;
node->extent[index + 2].count = node->extent[index].count;
node->extent[index + 2].poolBlk = 0;
node->extent[index + 1].count = fileBlk + extent->lengthOfExtent;
node->extent[index + 1].poolBlk = extent->poolBlkNum;
node->extent[index].count = fileBlk;
node->extent[index].poolBlk = 0;
}
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapInsertSparse_s) );
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_INSERT_SPARSE, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
memcpy(&logRecord->u.insertSparse.origExt[0], &origExt[0],
3 * sizeof(FmapExt_s));
memcpy(&logRecord->u.insertSparse.finalExt[0], &node->extent[index],
3 * sizeof(FmapExt_s));
logRecord->u.insertSparse.origNumRecs = origNumRecs;
logRecord->u.insertSparse.finalNumRecs = node->head.numRecs;
logRecord->u.insertSparse.index = index + 1;
node->head.lsn = logBuffer->ZXR_Lsn;
ZLOG_BIND(xaction, buf);
ZLOG_ReleaseRecord(xaction);
}
void splitBtreeLeafSparse(
ZfsXaction_s *xaction,
Buffer_s *bufChild,
Buffer_s *bufSibling,
Buffer_s *bufParent,
NINT index,
NINT pIndex,
Extent_s *extent,
NINT allocAhead,
Blknum_t fileBlk)
{
FmapNode_s *parent;
FmapNode_s *child;
FmapNode_s *sibling;
NINT tmpIndex;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
ASSERT_MPKNSS_LOCK();
child = (FmapNode_s *)bufChild->pBuf.data;
parent = (FmapNode_s *)bufParent->pBuf.data;
sibling = (FmapNode_s *)bufSibling->pBuf.data;
sibling->head.magic = child->head.magic;
sibling->head.fnh_internalID = child->head.fnh_internalID;
sibling->head.fnh_zid = child->head.fnh_zid;
sibling->head.state = child->head.state;
zASSERT(bufSibling->volBlk != 0);
if (allocAhead)
{
child->head.leafLink = bufSibling->volBlk;
sibling->head.leafLink = INVALID_BLK_ZERO;
pIndex = parent->head.numRecs - 1;
parent->extent[pIndex].count =
child->extent[child->head.numRecs - 1].count;
parent->extent[pIndex + 1].count = MAX_FILE_BLK;
parent->extent[pIndex + 1].poolBlk = bufSibling->volBlk;
parent->head.numRecs++;
sibling->extent[0].count =
child->extent[child->head.numRecs - 1].count;
sibling->extent[0].poolBlk =
child->extent[child->head.numRecs - 1].poolBlk;
sibling->head.numRecs = 1;
sibling->extent[sibling->head.numRecs].count =
sibling->extent[sibling->head.numRecs - 1].count + allocAhead;
sibling->extent[sibling->head.numRecs].poolBlk = 0;
sibling->head.numRecs++;
sibling->extent[sibling->head.numRecs].count =
sibling->extent[sibling->head.numRecs - 1].count +
extent->lengthOfExtent;
sibling->extent[sibling->head.numRecs].poolBlk = extent->poolBlkNum;
sibling->head.numRecs++;
}
else
{
sibling->head.leafLink = child->head.leafLink;
child->head.leafLink = bufSibling->volBlk;
tmpIndex = child->head.numRecs >> 1;
if (index == tmpIndex)
tmpIndex = tmpIndex + 1;
/** Copy half the records to the new node **/
memcpy(&sibling->extent[0], &child->extent[tmpIndex],
(sizeof(FmapExt_s) * (child->head.numRecs - tmpIndex)));
sibling->head.numRecs = child->head.numRecs - tmpIndex;
child->head.numRecs = tmpIndex + 1;
bzero(&child->extent[child->head.numRecs],
(sizeof(FmapExt_s) * (FMAP_MAX - child->head.numRecs)));
memmove(&parent->extent[pIndex + 2], &parent->extent[pIndex + 1],
sizeof(FmapExt_s) * (parent->head.numRecs - (pIndex + 1)));
parent->head.numRecs++;
parent->extent[pIndex + 1].count = parent->extent[pIndex].count;
parent->extent[pIndex + 1].poolBlk = bufSibling->volBlk;
parent->extent[pIndex].count = sibling->extent[0].count;
}
if (xaction == NULL)
{
zASSERT(allocAhead != 0);
return;
}
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(3) +
sizeof(FmapSplit_s) - 1 +
(sibling->head.numRecs * sizeof(FmapExt_s)) );
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_SPLIT, xaction, logBuffer,
3, logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], bufChild->volBlk,
child->head.lsn, bufChild, xaction, 0);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[1], bufParent->volBlk,
parent->head.lsn, bufParent, xaction, 1);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[2], bufSibling->volBlk,
sibling->head.lsn, bufSibling, xaction, 2);
ZLOG_ALLOC_BLOCK(logBlks[2]);
memcpy(&logRecord->u.split.data[0], &sibling->extent[0],
sibling->head.numRecs * sizeof(FmapExt_s));
memcpy(&logRecord->u.split.parentExt[0],
&parent->extent[pIndex], 2 * sizeof(FmapExt_s));
logRecord->u.split.pIndex = pIndex;
logRecord->u.split.childLink = child->head.leafLink;
logRecord->u.split.sibLink = sibling->head.leafLink;
logRecord->u.split.childMagic = child->head.magic;
logRecord->u.split.childState = child->head.state;
logRecord->u.split.numRecs = sibling->head.numRecs;
logRecord->u.split.fs_internalID = sibling->head.fnh_internalID;
logRecord->u.split.fs_zid = sibling->head.fnh_zid;
child->head.lsn = logBuffer->ZXR_Lsn;
parent->head.lsn = logBuffer->ZXR_Lsn;
sibling->head.lsn = logBuffer->ZXR_Lsn;
ZLOG_BIND(xaction, bufChild);
ZLOG_BIND(xaction, bufParent);
ZLOG_BIND(xaction, bufSibling);
ZLOG_ReleaseRecord(xaction);
if (!allocAhead)
{
if (index < child->head.numRecs)
{
updateNodeEntry(xaction, bufChild, index, extent, fileBlk);
}
else
{
index = index - (child->head.numRecs - 1);
updateNodeEntry(xaction, bufSibling, index, extent, fileBlk);
}
}
return;
}
STATUS updateSparseLeaf(
GeneralMsg_s *genMsg,
ZfsXaction_s *xaction,
ZFSStorageInfo_s *stInfo,
Buffer_s *buf,
Buffer_s *bufParent,
NINT index,
NINT parentIndex,
Blknum_t fileBlk,
Extent_s extent,
Blknum_t allocAhead)
{
Fmap_s *fmap = &stInfo->fmap;
RootBeast_s *beast = stInfo->comnInfo.beast;
FmapNode_s *node = (FmapNode_s *)buf->pBuf.data;
NINT pIndex = parentIndex;
Buffer_s *bufSibling = NULL;
IoMsg_s iomsg;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
NINT tmpFlag = 0;
FmapExt_s origExt[3];
WORD origNumRecs;
#if FMAP_TEST IS_ENABLED
NINT fmapMax;
#endif
ASSERT_MPKNSS_LOCK();
if (allocAhead)
{
#if FMAP_TEST IS_ENABLED
if (FmapTest)
fmapMax = FMAP_MAX_SMALL;
else
fmapMax = FMAP_MAX - 5;
if (node->head.numRecs < (fmapMax - 1))
#else
if (node->head.numRecs < (FMAP_MAX - 6))
#endif
{
node->extent[node->head.numRecs].count = fileBlk;
node->extent[node->head.numRecs].poolBlk = 0;
node->head.numRecs++;
node->extent[node->head.numRecs].count =
fileBlk + extent.lengthOfExtent;
node->extent[node->head.numRecs].poolBlk = extent.poolBlkNum;
node->head.numRecs++;
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapInsert_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_INSERT, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
node->head.lsn = logBuffer->ZXR_Lsn;
logRecord->u.insert.numRecs = 2;
logRecord->u.insert.extent.poolBlk = extent.poolBlkNum;
logRecord->u.insert.extent.count = extent.lengthOfExtent;
logRecord->u.insert.fileBlk = fileBlk;
ZLOG_BIND(xaction, buf);
ZLOG_ReleaseRecord(xaction);
CACHE_DIRTY_RELEASE(buf);
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(buf);
return zFAILURE;
}
if (node->head.state & BT_ROOT)
{
zASSERT(bufParent == NULL);
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
CACHE_RELEASE(buf);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, xaction);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(xaction, buf, bufParent, &pIndex, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
splitBtreeLeafSparse(xaction, buf, bufSibling, bufParent,
index, pIndex, &extent, allocAhead, fileBlk);
CACHE_DIRTY_RELEASE(buf);
buf = NULL;
CACHE_DIRTY_RELEASE(bufParent);
bufParent = NULL;
CACHE_DIRTY_RELEASE(bufSibling);
bufSibling = NULL;
}
stInfo->nextBlk = fileBlk + extent.lengthOfExtent;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
}
else
{
memcpy(&origExt[0], &node->extent[index -1], 3 * sizeof(FmapExt_s));
origNumRecs = node->head.numRecs;
if ((index > 1) &&
(fileBlk == node->extent[index - 1].count) &&
((node->extent[index - 1].poolBlk + (node->extent[index -1].count -
node->extent[index - 2].count)) == extent.poolBlkNum))
{
node->extent[index - 1].count += extent.lengthOfExtent;
if (node->extent[index - 1].count == node->extent[index].count)
{
if (node->extent[index + 1].poolBlk ==
(node->extent[index - 1].poolBlk +
(node->extent[index -1].count -
node->extent[index - 2].count)) )
{
node->extent[index-1].count = node->extent[index+1].count;
memmove(&node->extent[index], &node->extent[index + 2],
sizeof(FmapExt_s) * (node->head.numRecs - (index + 2)));
node->head.numRecs-=2;
node->extent[node->head.numRecs].count = 0;
node->extent[node->head.numRecs].poolBlk = 0;
node->extent[node->head.numRecs + 1].count = 0;
node->extent[node->head.numRecs + 1].poolBlk = 0;
}
else
{
memmove(&node->extent[index], &node->extent[index + 1],
sizeof(FmapExt_s) * (node->head.numRecs - (index + 1)));
node->head.numRecs--;
node->extent[node->head.numRecs].count = 0;
node->extent[node->head.numRecs].poolBlk = 0;
}
}
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
}
else if (((fileBlk + extent.lengthOfExtent) ==
node->extent[index].count) &&
((extent.poolBlkNum + extent.lengthOfExtent) ==
node->extent[index + 1].poolBlk))
{
node->extent[index].count = fileBlk;
node->extent[index + 1].poolBlk = extent.poolBlkNum;
if (node->extent[index - 1].count == node->extent[index].count)
{
if ((index > 1) && (node->extent[index + 1].poolBlk ==
(node->extent[index - 1].poolBlk +
(node->extent[index -1].count -
node->extent[index - 2].count)) ))
{
node->extent[index-1].count = node->extent[index+1].count;
memmove(&node->extent[index], &node->extent[index + 2],
sizeof(FmapExt_s) * (node->head.numRecs - (index + 2)));
node->head.numRecs-=2;
node->extent[node->head.numRecs].count = 0;
node->extent[node->head.numRecs].poolBlk = 0;
node->extent[node->head.numRecs + 1].count = 0;
node->extent[node->head.numRecs + 1].poolBlk = 0;
}
else
{
memmove(&node->extent[index], &node->extent[index + 1],
sizeof(FmapExt_s) * (node->head.numRecs - (index + 1)));
node->head.numRecs--;
node->extent[node->head.numRecs].count = 0;
node->extent[node->head.numRecs].poolBlk = 0;
}
}
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
}
else if ((fileBlk == node->extent[index - 1].count) &&
(extent.lengthOfExtent == (node->extent[index].count -
node->extent[index - 1].count)) )
{
node->extent[index].poolBlk = extent.poolBlkNum;
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
}
else
{
tmpFlag = 1;
#if FMAP_TEST IS_ENABLED
if (FmapTest)
fmapMax = FMAP_MAX_SMALL;
else
fmapMax = FMAP_MAX - 5;
if (node->head.numRecs < (fmapMax - 1))
#else
if (node->head.numRecs < (FMAP_MAX - 6))
#endif
{
updateNodeEntry(xaction, buf, index, &extent, fileBlk);
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
CACHE_DIRTY_RELEASE(buf);
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(buf);
return zFAILURE;
}
if (node->head.state & BT_ROOT)
{
zASSERT(bufParent == NULL);
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
CACHE_RELEASE(buf);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, xaction);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(xaction, buf, bufParent, &pIndex, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
splitBtreeLeafSparse(xaction, buf, bufSibling, bufParent,
index, pIndex, &extent, allocAhead, fileBlk);
CACHE_DIRTY_RELEASE(buf);
buf = NULL;
CACHE_DIRTY_RELEASE(bufParent);
bufParent = NULL;
CACHE_DIRTY_RELEASE(bufSibling);
bufSibling = NULL;
}
}
if (tmpFlag == 0)
{
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapInsertSparse_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_INSERT_SPARSE, xaction, logBuffer,
1, logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
memcpy(&logRecord->u.insertSparse.origExt[0], &origExt[0],
3 * sizeof(FmapExt_s));
memcpy(&logRecord->u.insertSparse.finalExt[0],
&node->extent[index -1],
3 * sizeof(FmapExt_s));
logRecord->u.insertSparse.origNumRecs = origNumRecs;
logRecord->u.insertSparse.finalNumRecs = node->head.numRecs;
logRecord->u.insertSparse.index = index;
node->head.lsn = logBuffer->ZXR_Lsn;
ZLOG_BIND(xaction, buf);
ZLOG_ReleaseRecord(xaction);
CACHE_DIRTY_RELEASE(buf);
}
}
return zOK;
}
STATUS updateSparse(
GeneralMsg_s *genMsg,
ZfsXaction_s *xaction,
ZFSStorageInfo_s *stInfo,
Blknum_t fileBlk,
Blknum_t *poolBlk,
Blknum_t allocAhead,
Blknum_t length,
BYTE flags)
{
Fmap_s *fmap = &stInfo->fmap;
RootBeast_s *beast = stInfo->comnInfo.beast;
Buffer_s *buf = NULL;
Buffer_s *bufParent = NULL;
Buffer_s *bufSibling = NULL;
Buffer_s *tmpBuf;
FmapNode_s *node;
NINT i, index, indexParent;
Extent_s extent;
Blknum_t seed;
Blknum_t len;
IoMsg_s iomsg;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
SQUAD numBytes;
#if FMAP_TEST IS_ENABLED
NINT fmapMax;
#endif
ASSERT_MPKNSS_LOCK();
numBytes = length << beast->ROOTblkSizeShift;
if (fmap->root == INVALID_BLK_ZERO)
{
/* Making file sparse the first time, filemap is still direct,
* continuing entries will be in the btree */
zASSERT(allocAhead != 0);
extent.poolBlkNum = 0;
extent.lengthOfExtent = length;
index = 0;
/* Check to make sure we have enough user and directory space available */
if (VOL_CheckUserSpace(genMsg, beast, numBytes) != zOK ||
DIRQ_CheckDirQuotas(genMsg, beast, numBytes) != zOK)
{
return zFAILURE;
}
if (zfsAllocExtent(genMsg, beast->vol.zfsVol, &extent,
(flags & ALLOC_BLOCKS_CONTIGUOUS) ? XTREE_CONTIGUOUS_BLKS : 0,
xaction) != zOK)
{
return zFAILURE;
}
XALLOCBLK_IO_MSG(iomsg, beast, xaction, CACHE_UPDATE);
if ((buf = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
zfsFreeExtent(genMsg, beast->vol.zfsVol, &extent, xaction);
return zFAILURE;
}
if (fmap->numRecs == 0)
fmap->numRecs++;
stInfo->fmapTreeBlks++;
node = (FmapNode_s *)buf->pBuf.data;
fmap->root = buf->volBlk;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
node->head.magic = FMAP_BT_ROOT;
node->head.fnh_internalID = beast->ROOTinternalID;
node->head.fnh_zid = beast->zid;
node->head.state = BT_ROOT | BT_LEAF;
node->head.leafLink = INVALID_BLK_ZERO;
node->head.lsn = 0;
node->extent[0].count = fmap->dirExt[fmap->numRecs -1].count;
node->extent[0].poolBlk = fmap->dirExt[fmap->numRecs -1].poolBlk;
node->head.numRecs = 1;
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapRoot_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_INIT_ROOT, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
ZLOG_ALLOC_BLOCK(logBlks[0]);
node->head.lsn = logBuffer->ZXR_Lsn;
logRecord->u.root.numRecs = 1;
logRecord->u.root.fr_internalID = node->head.fnh_internalID;
logRecord->u.root.fr_zid = node->head.fnh_zid;
memcpy(&logRecord->u.root.extent[0], &node->extent[0],
1 * sizeof(FmapExt_s));
ZLOG_BIND(xaction, buf);
ZLOG_ReleaseRecord(xaction);
goto ContinueAfterCreatingANewRoot;
}
else
{
READBLK_IO_MSG(iomsg, beast, fmap->root, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 17);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
zASSERT(node->head.magic == FMAP_BT_ROOT);
if (node->head.magic != FMAP_BT_ROOT)
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
}
while (!(node->head.state & BT_LEAF))
{
if (allocAhead)
{
*poolBlk = node->extent[node->head.numRecs -1].poolBlk;
index = node->head.numRecs - 1;
}
else
{
*poolBlk = searchBranch(node, fileBlk, &index);
}
#if FMAP_TEST IS_ENABLED
if (FmapTest)
fmapMax = FMAP_MAX_SMALL;
else
fmapMax = FMAP_MAX - 5;
if (node->head.numRecs < (fmapMax - 1))
#else
if (node->head.numRecs < (FMAP_MAX - 6))
#endif
{
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
bufParent = buf;
indexParent = index;
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(buf);
return zFAILURE;
}
if (node->head.state & BT_ROOT)
{
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
CACHE_RELEASE(buf);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, xaction);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(xaction, buf, bufParent, &indexParent, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
tmpBuf = splitBtreeBranchSparse(xaction, buf, bufSibling,
bufParent, index, &indexParent);
(tmpBuf == buf) ? CACHE_DIRTY_RELEASE(bufSibling):
CACHE_DIRTY_RELEASE(buf);
CACHE_DIRTY_RELEASE(bufParent);
bufParent = tmpBuf;
bufSibling = NULL;
bufParent->state |= CACHE_DIRTY;
}
buf = NULL;
READBLK_IO_MSG(iomsg, beast, *poolBlk, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 18);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_BRANCH));
if ((node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_BRANCH))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
return zFAILURE;
}
}
if (allocAhead)
{
if (node->head.numRecs > 1)
seed = node->extent[node->head.numRecs -1].poolBlk +
(fileBlk - node->extent[node->head.numRecs -2].count);
else
seed = 0;
index = node->head.numRecs - 1;
}
else
{
*poolBlk = searchLeaf(node, fileBlk, &len, &index);
zASSERT(*poolBlk == 0);
if (index > 2)
seed = node->extent[index - 1].poolBlk +
(fileBlk - node->extent[index -2].count);
else
seed = node->extent[index + 1].poolBlk -
(node->extent[index].count - fileBlk);
}
#if FMAP_TEST IS_ENABLED
if (FmapTest || OneBlockExtents)
extent.poolBlkNum = 0;
else
extent.poolBlkNum = seed;
#else
extent.poolBlkNum = seed;
#endif
extent.lengthOfExtent = length;
/* Check to make sure we have enough user/directory space available */
if (VOL_CheckUserSpace(genMsg, beast, numBytes) != zOK ||
DIRQ_CheckDirQuotas(genMsg, beast, numBytes) != zOK)
{
CACHE_DIRTY_RELEASE(buf);
if (bufParent)
{
CACHE_DIRTY_RELEASE(bufParent);
}
return zFAILURE;
}
if (zfsAllocExtent( genMsg, beast->vol.zfsVol, &extent,
(flags & ALLOC_BLOCKS_CONTIGUOUS) ? XTREE_CONTIGUOUS_BLKS : 0,
xaction) != zOK)
{
CACHE_DIRTY_RELEASE(buf);
if (bufParent)
{
CACHE_DIRTY_RELEASE(bufParent);
}
return zFAILURE;
}
ContinueAfterCreatingANewRoot:
*poolBlk = extent.poolBlkNum;
length = extent.lengthOfExtent;
if (updateSparseLeaf(genMsg, xaction, stInfo, buf, bufParent, index,
indexParent, fileBlk, extent, allocAhead) != zOK)
{
zfsFreeExtent(genMsg, beast->vol.zfsVol, &extent, xaction);
return zFAILURE;
}
VOL_AdjustUsedUserSpace(&xaction->xaction, beast,
(SQUAD)extent.lengthOfExtent << beast->ROOTblkSizeShift);
DIRQ_AdjustUsedDirSpace(&xaction->xaction, beast->ROOTvolume,
beast, (SQUAD)extent.lengthOfExtent << beast->ROOTblkSizeShift);
stInfo->fmapDataBlks+= extent.lengthOfExtent;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
if ((flags & ALLOC_NO_ZERO_FILL) == 0)
{
for(i = 1; i < length; i++)
{
buf = cacheAllocBufferForUserData(&beast->ROOTmycache,
fileBlk+i, (*poolBlk)+i,
ZFS_BlockSignalHandler, STAT_CACHE_ALLOCATE);
if (buf->pBuf.data == NULL)
{
buf->pBuf.data = kmap_atomic(buf->b_page, KM_USER0);
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
kunmap_atomic(buf->pBuf.data, KM_USER0);
buf->pBuf.data = NULL;
}
else
{
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
}
CACHE_DIRTY_RELEASE(buf);
}
}
return zOK;
}
STATUS updateBtreeFileMap(
GeneralMsg_s *genMsg,
ZfsXaction_s *xaction,
ZFSStorageInfo_s *stInfo,
Blknum_t fileBlk,
Extent_s *extent,
BYTE flags)
{
RootBeast_s *beast;
Fmap_s *fmap;
Blknum_t seed;
Buffer_s *bufChild = NULL;
FmapNode_s *child;
Buffer_s *bufParent = NULL;
Buffer_s *bufSibling = NULL;
Blknum_t poolBlk;
NINT tmp;
IoMsg_s iomsg;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
SQUAD numBytes;
#if FMAP_TEST IS_ENABLED
NINT fmapMax;
#endif
ASSERT_MPKNSS_LOCK();
beast = stInfo->comnInfo.beast;
fmap = &stInfo->fmap;
poolBlk = fmap->root;
ContinueScanningTheBtree:
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 19);
if ((bufChild = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
if (bufSibling)
{
CACHE_RELEASE(bufSibling);
}
return zFAILURE;
}
child = (FmapNode_s *)(bufChild->pBuf.data);
zASSERT( (child->head.magic == FMAP_BT_LEAF) ||
(child->head.magic == FMAP_BT_ROOT) ||
(child->head.magic == FMAP_BT_BRANCH) );
if ( (child->head.magic != FMAP_BT_LEAF) &&
(child->head.magic != FMAP_BT_ROOT) &&
(child->head.magic != FMAP_BT_BRANCH) )
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, bufChild, &iomsg);
CACHE_RELEASE(bufChild);
bufChild = NULL;
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
if (bufSibling)
{
CACHE_RELEASE(bufSibling);
}
return zFAILURE;
}
if (child->head.state & BT_LEAF)
{
seed = child->extent[child->head.numRecs-1].poolBlk -
child->extent[child->head.numRecs-2].count +
child->extent[child->head.numRecs-1].count;
#if FMAP_TEST IS_ENABLED
if (FmapTest || OneBlockExtents)
extent->poolBlkNum = 0;
else
extent->poolBlkNum = seed;
#else
extent->poolBlkNum = seed;
#endif
/* Check to make sure we have enough user/directory space available */
numBytes = extent->lengthOfExtent << beast->ROOTblkSizeShift;
if (VOL_CheckUserSpace(genMsg, beast, numBytes) != zOK ||
DIRQ_CheckDirQuotas(genMsg, beast, numBytes) != zOK)
{
if (bufChild)
{
CACHE_DIRTY_RELEASE(bufChild);
}
if (bufParent)
{
CACHE_DIRTY_RELEASE(bufParent);
}
if (bufSibling)
{
CACHE_DIRTY_RELEASE(bufSibling);
}
return zFAILURE;
}
if (zfsAllocExtent( genMsg, beast->vol.zfsVol, extent,
(flags & ALLOC_BLOCKS_CONTIGUOUS) ? XTREE_CONTIGUOUS_BLKS : 0,
xaction) != zOK)
{
if (bufChild)
{
CACHE_DIRTY_RELEASE(bufChild);
}
if (bufParent)
{
CACHE_DIRTY_RELEASE(bufParent);
}
if (bufSibling)
{
CACHE_DIRTY_RELEASE(bufSibling);
}
return zFAILURE;
}
#if FMAP_TEST IS_ENABLED
if (FmapTest)
fmapMax = FMAP_MAX_SMALL;
else
fmapMax = FMAP_MAX - 5;
#endif
if ((child->extent[child->head.numRecs -1].poolBlk +
child->extent[child->head.numRecs -1].count -
child->extent[child->head.numRecs -2].count) == extent->poolBlkNum)
{
child->extent[child->head.numRecs -1].count+=extent->lengthOfExtent;
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapInsert_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_INSERT, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], bufChild->volBlk,
child->head.lsn, bufChild, xaction, 0);
child->head.lsn = logBuffer->ZXR_Lsn;
logRecord->u.insert.numRecs = 0;
logRecord->u.insert.extent.poolBlk = 0;
logRecord->u.insert.extent.count = extent->lengthOfExtent;
ZLOG_BIND(xaction, bufChild);
ZLOG_ReleaseRecord(xaction);
}
#if FMAP_TEST IS_ENABLED
else if (child->head.numRecs < (fmapMax - 1))
#else
else if (child->head.numRecs < (FMAP_MAX - 6))
#endif
{
child->extent[child->head.numRecs].poolBlk = extent->poolBlkNum;
child->extent[child->head.numRecs].count =
child->extent[child->head.numRecs -1].count+extent->lengthOfExtent;
child->head.numRecs++;
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapInsert_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_INSERT, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], bufChild->volBlk,
child->head.lsn, bufChild, xaction, 0);
child->head.lsn = logBuffer->ZXR_Lsn;
logRecord->u.insert.numRecs = 1;
logRecord->u.insert.extent.poolBlk =
child->extent[child->head.numRecs -1].poolBlk;
logRecord->u.insert.extent.count =
child->extent[child->head.numRecs -1].count;
ZLOG_BIND(xaction, bufChild);
ZLOG_ReleaseRecord(xaction);
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
zfsFreeExtent(genMsg, beast->vol.zfsVol,
extent, xaction);
CACHE_RELEASE(bufChild);
return zFAILURE;
}
if (child->head.state & BT_ROOT)
{
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
extent, xaction);
CACHE_RELEASE(bufChild);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, xaction);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(xaction, bufChild, bufParent, &tmp, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
syncSplitBtree(xaction, bufChild, bufSibling, bufParent, extent);
}
VOL_AdjustUsedUserSpace(&xaction->xaction, beast,
(SQUAD)extent->lengthOfExtent << beast->ROOTblkSizeShift);
DIRQ_AdjustUsedDirSpace(&xaction->xaction, beast->ROOTvolume, beast,
(SQUAD)extent->lengthOfExtent << beast->ROOTblkSizeShift);
stInfo->fmapDataBlks+= extent->lengthOfExtent;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
}
else
{
poolBlk = child->extent[child->head.numRecs-1].poolBlk;
#if FMAP_TEST IS_ENABLED
if (FmapTest)
fmapMax = FMAP_MAX_SMALL;
else
fmapMax = FMAP_MAX - 5;
if (child->head.numRecs < (fmapMax - 1))
#else
if (child->head.numRecs < (FMAP_MAX - 6))
#endif
{
if (bufParent)
{
CACHE_DIRTY_RELEASE(bufParent);
bufParent = NULL;
}
bufParent = bufChild;
bufChild = NULL;
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(bufChild);
return zFAILURE;
}
if (child->head.state & BT_ROOT)
{
XALLOC_SEED_IO_MSG(iomsg, beast, xaction, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
CACHE_RELEASE(bufChild);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, xaction);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(xaction, bufChild, bufParent, &tmp, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
syncSplitBtree(xaction, bufChild, bufSibling, bufParent, extent);
CACHE_DIRTY_RELEASE(bufChild);
bufChild = NULL;
CACHE_DIRTY_RELEASE(bufParent);
bufParent = NULL;
bufParent = bufSibling;
bufSibling = NULL;
}
goto ContinueScanningTheBtree;
}
CACHE_DIRTY_RELEASE(bufChild);
if (bufParent)
{
CACHE_DIRTY_RELEASE(bufParent);
}
if (bufSibling)
{
CACHE_DIRTY_RELEASE(bufSibling);
}
return zOK;
}
STATUS updateDirectFileMap(
GeneralMsg_s *genMsg,
ZfsXaction_s *xaction,
ZFSStorageInfo_s *stInfo,
Blknum_t fileBlk,
Extent_s *extent)
{
RootBeast_s *beast;
Fmap_s *fmap;
Buffer_s *buffer = NULL;
FmapNode_s *node;
Blknum_t poolBlk = extent->poolBlkNum;
Blknum_t length = extent->lengthOfExtent;
IoMsg_s iomsg;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
ASSERT_MPKNSS_LOCK();
beast = stInfo->comnInfo.beast;
fmap = &stInfo->fmap;
if (fmap->numRecs == 1)
{
fmap->dirExt[fmap->numRecs].count = length;
fmap->dirExt[fmap->numRecs].poolBlk = poolBlk;
fmap->numRecs++;
}
else if ( ( fmap->dirExt[fmap->numRecs -1].poolBlk -
fmap->dirExt[fmap->numRecs -2].count +
fmap->dirExt[fmap->numRecs -1].count ) == poolBlk)
{
fmap->dirExt[fmap->numRecs - 1].count += length;
}
else if (fmap->numRecs < MAX_DIRECT)
{
fmap->dirExt[fmap->numRecs].poolBlk = poolBlk;
fmap->dirExt[fmap->numRecs].count =
fmap->dirExt[fmap->numRecs -1].count + length;
fmap->numRecs++;
}
else
{
XALLOCBLK_IO_MSG(iomsg, beast, xaction, CACHE_UPDATE);
if ((buffer = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
stInfo->fmapTreeBlks++;
fmap->root = buffer->volBlk;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
node = (FmapNode_s *)(buffer->pBuf.data);
node->head.magic = FMAP_BT_ROOT;
node->head.fnh_internalID = beast->ROOTinternalID;
node->head.fnh_zid = beast->zid;
node->head.state = BT_ROOT | BT_LEAF;
node->head.numRecs = 0;
node->head.leafLink = 0;
node->head.lsn = 0;
node->extent[node->head.numRecs].count =
fmap->dirExt[fmap->numRecs - 1].count;
node->extent[node->head.numRecs].poolBlk =
fmap->dirExt[fmap->numRecs -1].poolBlk;
node->head.numRecs++;
node->extent[node->head.numRecs].count =
fmap->dirExt[fmap->numRecs - 1].count + length;
node->extent[node->head.numRecs].poolBlk = poolBlk;
node->head.numRecs++;
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapRoot_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_INIT_ROOT, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buffer->volBlk,
node->head.lsn, buffer, xaction, 0);
ZLOG_ALLOC_BLOCK(logBlks[0]);
node->head.lsn = logBuffer->ZXR_Lsn;
logRecord->u.root.numRecs = 2;
logRecord->u.root.fr_internalID = node->head.fnh_internalID;
logRecord->u.root.fr_zid = node->head.fnh_zid;
memcpy(&logRecord->u.root.extent[0], &node->extent[0],
2 * sizeof(FmapExt_s));
ZLOG_BIND(xaction, buffer);
ZLOG_ReleaseRecord(xaction);
CACHE_DIRTY_RELEASE(buffer);
}
return zOK;
}
/*
ZLSS_PARENT_BIAS -
If we use the PS_ routines then they need to be volume specific
and optimized... This logic will get better performance if we
have an I/O scheduler that has an AREA cache. If we don't have
an AREA cache I/O scheduler then we would have to rely on the
devices track cache (which are limited in size). The track cache
may help desktops or lightly used servers. See Physical I/O
specification for more details.
*/
#define ZLSS_PARENT_BIAS
#ifdef ZLSS_PARENT_BIAS
typedef struct ParentSeed_s {
Zid_t PS_Zid;
Blknum_t PS_Seed;
} ParentSeed_s;
#endif
#ifdef ZLSS_PARENT_BIAS
BOOL ZLSS_ParentBias = FALSE; /* Allocate 1st block of small file near
other small files from same directory. This should
help read performance, but not writes. Writes should
be helped more by using global seed becuase it causes
ALL new writes to be near each other. ZLSS_ParentBias
only makes writes from same directory next to each
other. */
Blknum_t ZLSS_ParentBiasSmall = 2; /* Defines how many blocks on the 1st
allocation that make a file 'small'. Note that if
a large file is created by doing small writes then
this code assumes that it is a small file:-( */
// TODO(Perf): Is the 1st allocLength always large when allocahead is being used?
ParentSeed_s VolumeSeeds[128];
#endif
#ifdef ZLSS_PARENT_BIAS
static Blknum_t PS_FindSeed( Zid_t parentZid )
{
int i;
for ( i=0; i < NELEMS(VolumeSeeds); ++i ) {
if ( parentZid == VolumeSeeds[i].PS_Zid ) {
return VolumeSeeds[i].PS_Seed;
}
}
return 0;
}
#endif
#ifdef ZLSS_PARENT_BIAS
static void PS_InsertItem( Zid_t parentZid, Blknum_t seed )
{
memmove( &VolumeSeeds[1], &VolumeSeeds[0], (NELEMS(VolumeSeeds)-1)*sizeof(VolumeSeeds[0]) );
VolumeSeeds[0].PS_Zid = parentZid;
VolumeSeeds[0].PS_Seed = seed;
return;
}
#endif
#ifdef ZLSS_PARENT_BIAS
static void PS_UpdateItem( Zid_t parentZid, Blknum_t seed )
{
int i;
for ( i=0; i < NELEMS(VolumeSeeds); ++i ) {
if ( parentZid == VolumeSeeds[i].PS_Zid ) {
VolumeSeeds[i].PS_Seed = seed;
return;
}
}
PS_InsertItem(parentZid,seed);
return;
}
#endif
STATUS extendFileMap(
GeneralMsg_s *genMsg,
ZfsXaction_s *xaction,
ZFSStorageInfo_s *stInfo,
Blknum_t *poolBlk,
NINT allocLength,
BYTE flags)
{
Fmap_s *fmap = &stInfo->fmap;
RootBeast_s *beast = stInfo->comnInfo.beast;
Blknum_t seed;
Extent_s extent;
Blknum_t fileBlk = stInfo->nextBlk;
Blknum_t allocatedLength;
Buffer_s *buf;
NINT i;
SQUAD numBytes;
Zid_t parentZid = 0;
NINT lFlags = 0;
ASSERT_MPKNSS_LOCK();
extent.lengthOfExtent = allocLength;
if ((fileBlk == 0) && (fmap->numRecs == 0))
{
fmap->numRecs = 1;
}
if (fmap->root == INVALID_BLK_ZERO)
{
if (fmap->numRecs == 1)
{
seed = 0;
#ifdef ZLSS_PARENT_BIAS
if ( ZLSS_ParentBias && (allocLength <= ZLSS_ParentBiasSmall) &&
COMN_IsDerivedFrom( beast, zFTYPE_NAMED_DATA_STREAM ) )
{
NamedBeast_s *nbeast = (NamedBeast_s *)beast;
/*if ( nbeast->firstParent ) */{ /* Need to look at parent count > 0? */
parentZid = nbeast->firstParent.p.zid;
if ( parentZid ) {
seed = PS_FindSeed( parentZid );
/* ZLSS_PARENT_BIAS - If we keep this then need new
flags so that we do not mix metadata area with user
blocks. Some of the user files may be more than
one block so they may want to smash into our
metadata blocks.
ZLSS_PARENT_BIAS - May want to use allocLength
to decide where to allocate block(s). */
if ( seed )
{
lFlags = XTREE_AF_NEAR_TREE;
}
else
{
lFlags = XTREE_AF_NEW_AREA;
}
}
}
}
#endif
}
else
{
seed = fmap->dirExt[fmap->numRecs - 1].poolBlk +
(fmap->dirExt[fmap->numRecs - 1].count -
fmap->dirExt[fmap->numRecs - 2].count);
}
#if FMAP_TEST IS_ENABLED
if (FmapTest || OneBlockExtents)
extent.poolBlkNum = 0;
else
extent.poolBlkNum = seed;
#else
extent.poolBlkNum = seed;
#endif
/* Check to make sure we have enough user/directory space available */
numBytes = allocLength << beast->ROOTblkSizeShift;
if (VOL_CheckUserSpace(genMsg, beast, numBytes) != zOK ||
DIRQ_CheckDirQuotas(genMsg, beast, numBytes) != zOK)
{
return zFAILURE;
}
if (zfsAllocExtent( genMsg, beast->vol.zfsVol, &extent,
(flags & ALLOC_BLOCKS_CONTIGUOUS) ? XTREE_CONTIGUOUS_BLKS : lFlags,
xaction) != zOK)
{
return zFAILURE;
}
if (updateDirectFileMap(genMsg, xaction,
stInfo, fileBlk, &extent) != zOK)
{
zfsFreeExtent(genMsg, beast->vol.zfsVol, &extent, xaction);
return zFAILURE;
}
#ifdef ZLSS_PARENT_BIAS
if ( parentZid ) {
PS_UpdateItem( parentZid, extent.poolBlkNum );
}
#endif
VOL_AdjustUsedUserSpace(&xaction->xaction, beast,
(SQUAD)extent.lengthOfExtent << beast->ROOTblkSizeShift);
DIRQ_AdjustUsedDirSpace(&xaction->xaction, beast->ROOTvolume, beast,
(SQUAD)extent.lengthOfExtent << beast->ROOTblkSizeShift);
stInfo->fmapDataBlks+= extent.lengthOfExtent;
}
else
{
if (updateBtreeFileMap(genMsg, xaction,
stInfo, fileBlk, &extent, flags) != zOK)
{
return zFAILURE;
}
}
allocatedLength = extent.lengthOfExtent;
*poolBlk = extent.poolBlkNum;
stInfo->nextBlk = fileBlk + extent.lengthOfExtent;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
if ((flags & ALLOC_NO_ZERO_FILL) == 0)
{
for(i = 1; i < allocatedLength; i++)
{
buf = cacheAllocBufferForUserData(&beast->ROOTmycache,
fileBlk+i, (*poolBlk)+i,
ZFS_BlockSignalHandler, STAT_CACHE_ALLOCATE);
if (buf->pBuf.data == NULL)
{
buf->pBuf.data = kmap_atomic(buf->b_page, KM_USER0);
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
kunmap_atomic(buf->pBuf.data, KM_USER0);
buf->pBuf.data = NULL;
}
else
{
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
}
CACHE_DIRTY_RELEASE(buf);
}
}
return zOK;
}
Buffer_s *ZFSVOL_VOL_getFileBlk(
GeneralMsg_s *genMsg,
IoMsg_s *io)
{
RootBeast_s *beast = io->beast;
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
Blknum_t poolBlk;
Blknum_t tmpBlk;
Blknum_t fileBlk = io->fileBlk;
Blknum_t allocAhead;
Blknum_t allocRemain;
Buffer_s *buf;
ZfsXaction_s *xaction;
ASSERT_MPKNSS_LOCK();
if (io->mode == CACHE_READ)
{
/** Does not change the file map **/
ASSERT_LATCH(&beast->ROOTbeastLatch);
if (fileBlk >= stInfo->nextBlk)
{
ASSERT_SLATCH( &CACHE_SparseBuffer.agent.latch);
ADD_LATCH( &CACHE_SparseBuffer.agent.latch);
return &CACHE_SparseBuffer;
}
if (findBlkInFileMap(genMsg, stInfo, fileBlk, &poolBlk) != zOK)
{
return NULL;
}
if (poolBlk == 0)
{
ASSERT_SLATCH( &CACHE_SparseBuffer.agent.latch);
ADD_LATCH( &CACHE_SparseBuffer.agent.latch);
return &CACHE_SparseBuffer;
}
io->volBlk = poolBlk;
SET_DEBUG_ID(*io, 20);
return ZFS_ReadPoolBlk(genMsg, io);
}
else if (fileBlk >= stInfo->nextBlk)
{
ASSERT_XLATCH(&beast->ROOTbeastLatch);
zASSERT(io->allocNumBlks >= 1);
zASSERT(!(beast->ROOTvolume->VOLenabledAttributes & zATTR_READONLY));
if ((io->allocNumBlks > 1) &&
(io->flags & ALLOC_NUM_BLOCKS_IS_OPTIONAL))
{
beast->bstState |= BST_STATE_TRUNCATE_CLOSE;
}
xaction = BeginXLocal(beast->vol.volume,
BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
allocAhead = fileBlk - stInfo->nextBlk;
if (allocAhead > ALLOC_AHEAD_SPARSE)
{
allocRemain = io->allocNumBlks;
tmpBlk = fileBlk;
if (updateSparse(genMsg, xaction, stInfo, fileBlk, &poolBlk,
allocAhead, allocRemain, io->flags) != zOK)
{
COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
return NULL;
}
zASSERT(stInfo->nextBlk > tmpBlk);
io->volBlk = poolBlk;
allocRemain -= (stInfo->nextBlk - tmpBlk);
while (allocRemain)
{
if ((stInfo->nextBlk > fileBlk) &&
(io->flags & (ALLOC_NUM_BLOCKS_IS_OPTIONAL |
ALLOC_BLOCKS_CONTIGUOUS)))
{
break;
}
tmpBlk = stInfo->nextBlk;
if (extendFileMap(genMsg, xaction, stInfo, &poolBlk,
allocRemain, io->flags) != zOK)
{
COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
return NULL;
}
zASSERT(stInfo->nextBlk > tmpBlk);
allocRemain -= (stInfo->nextBlk - tmpBlk);
if ((io->flags & ALLOC_NO_ZERO_FILL) == 0)
{
buf = cacheAllocBufferForUserData(&beast->ROOTmycache,
tmpBlk, poolBlk,
ZFS_BlockSignalHandler, STAT_CACHE_ALLOCATE);
if (buf->pBuf.data == NULL)
{
buf->pBuf.data = kmap_atomic(buf->b_page, KM_USER0);
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
kunmap_atomic(buf->pBuf.data, KM_USER0);
buf->pBuf.data = NULL;
}
else
{
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
}
CACHE_DIRTY_RELEASE(buf);
}
}
}
else
{
while (allocAhead)
{
tmpBlk = stInfo->nextBlk;
if (extendFileMap(genMsg, xaction, stInfo, &poolBlk,
allocAhead, io->flags) != zOK)
{
COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
return NULL;
}
zASSERT(stInfo->nextBlk > tmpBlk);
allocAhead -= (stInfo->nextBlk - tmpBlk);
if ((io->flags & ALLOC_NO_ZERO_FILL) == 0)
{
buf = cacheAllocBufferForUserData(&beast->ROOTmycache,
tmpBlk, poolBlk,
ZFS_BlockSignalHandler, STAT_CACHE_ALLOCATE);
if (buf->pBuf.data == NULL)
{
buf->pBuf.data = kmap_atomic(buf->b_page, KM_USER0);
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
kunmap_atomic(buf->pBuf.data, KM_USER0);
buf->pBuf.data = NULL;
}
else
{
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
}
CACHE_DIRTY_RELEASE(buf);
}
}
/** Write the requested fileblk **/
zASSERT(fileBlk == stInfo->nextBlk);
tmpBlk = stInfo->nextBlk;
allocRemain = io->allocNumBlks;
if (extendFileMap(genMsg, xaction, stInfo, &poolBlk,
allocRemain, io->flags) != zOK)
{
COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
return NULL;
}
zASSERT(stInfo->nextBlk > tmpBlk);
io->volBlk = poolBlk;
allocRemain -= (stInfo->nextBlk - tmpBlk);
while (allocRemain)
{
if ((stInfo->nextBlk > fileBlk) &&
(io->flags & (ALLOC_NUM_BLOCKS_IS_OPTIONAL |
ALLOC_BLOCKS_CONTIGUOUS)))
{
break;
}
tmpBlk = stInfo->nextBlk;
if (extendFileMap(genMsg, xaction, stInfo, &poolBlk,
allocRemain, io->flags) != zOK)
{
COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
return NULL;
}
zASSERT(stInfo->nextBlk > tmpBlk);
allocRemain -= (stInfo->nextBlk - tmpBlk);
if ((io->flags & ALLOC_NO_ZERO_FILL) == 0)
{
buf = cacheAllocBufferForUserData(&beast->ROOTmycache,
tmpBlk, poolBlk,
ZFS_BlockSignalHandler, STAT_CACHE_ALLOCATE);
if (buf->pBuf.data == NULL)
{
buf->pBuf.data = kmap_atomic(buf->b_page, KM_USER0);
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
kunmap_atomic(buf->pBuf.data, KM_USER0);
buf->pBuf.data = NULL;
}
else
{
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
}
CACHE_DIRTY_RELEASE(buf);
}
}
}
if (COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction) != zOK)
{
EndXlocal(xaction);
return NULL;
}
if (io->flags & ALLOC_BLOCKS_CONTIGUOUS)
{
if ((io->fileBlk + io->allocNumBlks) != stInfo->nextBlk)
{
/* This is the case where we have only partially allocated
* the requested contiguous blocks
*/
if ((io->flags & ALLOC_NO_ZERO_FILL) == 0)
{
buf = cacheAllocBufferForUserData(&beast->ROOTmycache,
io->fileBlk, io->volBlk,
ZFS_BlockSignalHandler, STAT_CACHE_ALLOCATE);
if (buf->pBuf.data == NULL)
{
buf->pBuf.data = kmap_atomic(buf->b_page, KM_USER0);
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
kunmap_atomic(buf->pBuf.data, KM_USER0);
buf->pBuf.data = NULL;
}
else
{
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
}
CACHE_DIRTY_RELEASE(buf);
}
SetErrno(genMsg, zERR_CONTIGUOUS_SPACE);
EndXlocal(xaction);
return NULL;
}
}
buf = cacheAllocBufferForUserData(&beast->ROOTmycache,
io->fileBlk, io->volBlk,
ZFS_BlockSignalHandler, STAT_CACHE_ALLOCATE);
if ((io->mode != CACHE_WRITE) &&
((io->flags & ALLOC_NO_ZERO_FILL) == 0))
{
if (buf->pBuf.data == NULL)
{
buf->pBuf.data = kmap_atomic(buf->b_page, KM_USER0);
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
kunmap_atomic(buf->pBuf.data, KM_USER0);
buf->pBuf.data = NULL;
}
else
{
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
}
}
EndXlocal(xaction);
return buf;
}
else
{
ASSERT_XLATCH(&beast->ROOTbeastLatch);
zASSERT(!(beast->ROOTvolume->VOLenabledAttributes & zATTR_READONLY));
if (findBlkInFileMap(genMsg, stInfo, io->fileBlk, &poolBlk) != zOK)
{
return NULL;
}
if (poolBlk != 0)
{
io->volBlk = poolBlk;
SET_DEBUG_ID(*io, 21);
return ZFS_ReadPoolBlk(genMsg, io);
}
else
{
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
if (updateSparse(genMsg, xaction, stInfo, fileBlk, &poolBlk,
0, 1, io->flags) != zOK)
{
COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
return NULL;
}
io->volBlk = poolBlk;
buf = cacheAllocBufferForUserData(&beast->ROOTmycache,
io->fileBlk, io->volBlk,
ZFS_BlockSignalHandler, STAT_CACHE_ALLOCATE);
if ((io->mode != CACHE_WRITE) &&
((io->flags & ALLOC_NO_ZERO_FILL) == 0))
{
if (buf->pBuf.data == NULL)
{
buf->pBuf.data = kmap_atomic(buf->b_page, KM_USER0);
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
kunmap_atomic(buf->pBuf.data, KM_USER0);
buf->pBuf.data = NULL;
}
else
{
bzero(buf->pBuf.data, (1 << buf->bufSizeShift));
}
}
if (COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction) != zOK)
{
EndXlocal(xaction);
CACHE_RELEASE(buf);
return NULL;
}
EndXlocal(xaction);
return buf;
}
}
}
/*************************************************************************
* Recovery routines
*************************************************************************/
STATUS redoInitRoot(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
bzero(buffer->pBuf.data, (1 << buffer->bufSizeShift));
node = (FmapNode_s *)buffer->pBuf.data;
node->head.magic = FMAP_BT_ROOT;
node->head.fnh_internalID = logRecord->u.root.fr_internalID;
node->head.fnh_zid = logRecord->u.root.fr_zid;
node->head.state = BT_ROOT | BT_LEAF;
node->head.numRecs = logRecord->u.root.numRecs;
node->head.leafLink = 0;
memcpy(&node->extent[0], &logRecord->u.root.extent[0],
node->head.numRecs * sizeof(FmapExt_s));
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
return zOK;
}
STATUS undoInitRoot(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
BlockInfo_s *logBlks;
Buffer_s *buffer;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_WRITE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) != NULL)
{
cacheReleaseToss(buffer);
}
}
return zOK;
}
STATUS redoFmapInsert(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
WORD index;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
index = node->head.numRecs - 1;
if (logRecord->u.insert.numRecs == 0)
{
node->extent[index].count += logRecord->u.insert.extent.count;
}
else if (logRecord->u.insert.numRecs == 1)
{
node->head.numRecs++;
node->extent[index + 1].poolBlk =
logRecord->u.insert.extent.poolBlk;
node->extent[index + 1].count =
logRecord->u.insert.extent.count;
}
else if (logRecord->u.insert.numRecs == 2)
{
node->head.numRecs+=2;
node->extent[index + 1].poolBlk = 0;
node->extent[index + 1].count = logRecord->u.insert.fileBlk;
node->extent[index + 2].poolBlk =
logRecord->u.insert.extent.poolBlk;
node->extent[index + 2].count = logRecord->u.insert.fileBlk +
logRecord->u.insert.extent.count;
}
else
{
zASSERT(0);
}
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
STATUS undoFmapInsert(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
WORD index;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
index = node->head.numRecs - 1;
if (logRecord->u.insert.numRecs == 0)
{
node->extent[index].count -= logRecord->u.insert.extent.count;
}
else if (logRecord->u.insert.numRecs == 1)
{
node->head.numRecs--;
node->extent[index].poolBlk = 0;
node->extent[index].count = 0;
}
else if (logRecord->u.insert.numRecs == 2)
{
node->head.numRecs-=2;
node->extent[index].poolBlk = 0;
node->extent[index].count = 0;
node->extent[index - 1].poolBlk = 0;
node->extent[index - 1].count = 0;
}
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
STATUS redoFmapInsertSparse(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
WORD origNumRecs;
WORD finalNumRecs;
WORD diff;
WORD index;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
origNumRecs = logRecord->u.insertSparse.origNumRecs;
finalNumRecs = logRecord->u.insertSparse.finalNumRecs;
index = logRecord->u.insertSparse.index - 1;
if (origNumRecs > finalNumRecs)
{
diff = origNumRecs - finalNumRecs;
memmove(&node->extent[index], &node->extent[index + diff],
sizeof(FmapExt_s) * (node->head.numRecs - (index + diff)));
while(diff)
{
node->extent[node->head.numRecs - diff].count = 0;
node->extent[node->head.numRecs - diff].poolBlk = 0;
diff--;
}
}
else if (finalNumRecs > origNumRecs)
{
diff = finalNumRecs - origNumRecs;
memmove(&node->extent[index + diff], &node->extent[index],
sizeof(FmapExt_s) * (node->head.numRecs - index));
}
memcpy(&node->extent[index], &logRecord->u.insertSparse.finalExt[0],
3 * sizeof(FmapExt_s));
node->head.numRecs = finalNumRecs;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
STATUS undoFmapInsertSparse(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
WORD index;
WORD origNumRecs;
WORD finalNumRecs;
WORD diff;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
origNumRecs = logRecord->u.insertSparse.origNumRecs;
finalNumRecs = logRecord->u.insertSparse.finalNumRecs;
index = logRecord->u.insertSparse.index - 1;
if (origNumRecs > finalNumRecs)
{
diff = origNumRecs - finalNumRecs;
memmove(&node->extent[index + diff], &node->extent[index],
sizeof(FmapExt_s) * (node->head.numRecs - index));
}
else if (finalNumRecs > origNumRecs)
{
diff = finalNumRecs - origNumRecs;
memmove(&node->extent[index], &node->extent[index + diff],
sizeof(FmapExt_s) * (node->head.numRecs - (index + diff)));
while(diff)
{
node->extent[node->head.numRecs - diff].count = 0;
node->extent[node->head.numRecs - diff].poolBlk = 0;
diff--;
}
}
memcpy(&node->extent[index], &logRecord->u.insertSparse.origExt[0],
3 * sizeof(FmapExt_s));
node->head.numRecs = origNumRecs;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
STATUS redoFmapGrow(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
/** update child to no longer be root **/
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
node->head.state &= ~BT_ROOT;
if (node->head.state & BT_LEAF)
node->head.magic = FMAP_BT_LEAF;
else
node->head.magic = FMAP_BT_BRANCH;
node->head.fnh_internalID = logRecord->u.grow.fg_internalID;
node->head.fnh_zid = logRecord->u.grow.fg_zid;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
/** update parent as new root **/
if (ZLOG_VALID_BLOCK(logBlks[1]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[1].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
bzero(buffer->pBuf.data, (1 << buffer->bufSizeShift));
node = (FmapNode_s *)buffer->pBuf.data;
node->head.magic = FMAP_BT_ROOT;
node->head.fnh_internalID = logRecord->u.grow.fg_internalID;
node->head.fnh_zid = logRecord->u.grow.fg_zid;
node->head.state = BT_ROOT;
node->head.numRecs = 2;
node->head.leafLink = INVALID_BLK_ZERO;
memcpy(&node->extent[0], &logRecord->u.grow.extent[0],
sizeof(FmapGrow_s));
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[1], pass);
CACHE_DIRTY_RELEASE(buffer);
}
return zOK;
}
STATUS undoFmapGrow(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
node->head.state |= BT_ROOT;
node->head.magic = FMAP_BT_ROOT;
node->head.fnh_internalID = logRecord->u.grow.fg_internalID;
node->head.fnh_zid = logRecord->u.grow.fg_zid;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
if (ZLOG_VALID_BLOCK(logBlks[1]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[1].blkNum, CACHE_WRITE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) != NULL)
{
cacheReleaseToss(buffer);
}
}
return zOK;
}
STATUS redoFmapSplit(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
/** update child **/
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
if (node->head.state & BT_LEAF)
{
node->head.leafLink = logRecord->u.split.childLink;
if (logRecord->u.split.numRecs > 3)
{
/* This is the case where we are splitting a leaf that
* has a sparse block and it is in the middle as opposed
* to being on the extreme right.
* When we split a leaf on the extreme right the numRecs
* in child does not change, only 2 or 3 records are added
* to the sibling. This is because we are doing an insert
* with the split and that is why we have to do this slimy
* check of numRecs > 3. But, if the leaf is in the
* middle, then
* when we split, we copy half the data from child to
* sibling, and in that case split.numRecs is always > 3
* and we need to fix up the child's numRecs as well as
* zero out the entries that were copied over
*/
node->head.numRecs = node->head.numRecs -
logRecord->u.split.numRecs + 1;
bzero(&node->extent[node->head.numRecs],
(sizeof(FmapExt_s) * (FMAP_MAX - node->head.numRecs)));
}
}
else
{
node->head.numRecs = node->head.numRecs -
logRecord->u.split.numRecs + 1;
bzero(&node->extent[node->head.numRecs],
(sizeof(FmapExt_s) * (FMAP_MAX - node->head.numRecs)));
}
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
/** update parent **/
if (ZLOG_VALID_BLOCK(logBlks[1]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[1].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
memmove(&node->extent[logRecord->u.split.pIndex + 2],
&node->extent[logRecord->u.split.pIndex + 1],
sizeof(FmapExt_s) * (node->head.numRecs -
(logRecord->u.split.pIndex + 1)));
memcpy(&node->extent[logRecord->u.split.pIndex],
&logRecord->u.split.parentExt[0], 2 * sizeof(FmapExt_s));
node->head.numRecs++;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[1], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
/** update sibling **/
if (ZLOG_VALID_BLOCK(logBlks[2]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[2].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
bzero(buffer->pBuf.data, (1 << buffer->bufSizeShift));
node = (FmapNode_s *)buffer->pBuf.data;
node->head.magic = logRecord->u.split.childMagic;
node->head.fnh_internalID = logRecord->u.split.fs_internalID;
node->head.fnh_zid = logRecord->u.split.fs_zid;
node->head.state = logRecord->u.split.childState;
node->head.leafLink = logRecord->u.split.sibLink;
memcpy(&node->extent[0], &logRecord->u.split.data[0],
logRecord->u.split.numRecs * sizeof(FmapExt_s));
node->head.numRecs = logRecord->u.split.numRecs;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[2], pass);
CACHE_DIRTY_RELEASE(buffer);
}
return zOK;
}
STATUS undoFmapSplit(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
/** undo child **/
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
if (node->head.state & BT_LEAF)
{
node->head.leafLink = logRecord->u.split.sibLink;
if (logRecord->u.split.numRecs > 3)
{
/* This is the case where we are undoing a split of a leaf
* that has a sparse block and it is in the middle as
* opposed to being on the extreme right.
* When we split a leaf on the extreme right the numRecs
* in child does not change, only 2 or 3 records are added
* to the sibling. But, if the leaf is in the middle, then
* when we split, we copy half the data from child to
* sibling, and in that case split.numRecs is always > 3
*/
memcpy(&node->extent[node->head.numRecs - 1],
&logRecord->u.split.data[0],
sizeof(FmapExt_s) * logRecord->u.split.numRecs);
node->head.numRecs = node->head.numRecs +
logRecord->u.split.numRecs - 1;
}
}
else
{
memcpy(&node->extent[node->head.numRecs - 1],
&logRecord->u.split.data[0],
sizeof(FmapExt_s) * logRecord->u.split.numRecs);
node->head.numRecs = node->head.numRecs +
logRecord->u.split.numRecs - 1;
}
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
/** undo parent **/
if (ZLOG_VALID_BLOCK(logBlks[1]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[1].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
node->extent[logRecord->u.split.pIndex].count =
node->extent[logRecord->u.split.pIndex + 1].count;
memmove(&node->extent[logRecord->u.split.pIndex + 1],
&node->extent[logRecord->u.split.pIndex + 2],
sizeof(FmapExt_s) * (node->head.numRecs -
(logRecord->u.split.pIndex + 2)));
node->head.numRecs--;
node->extent[node->head.numRecs].count = 0;
node->extent[node->head.numRecs].poolBlk = 0;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[1], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
/** undo sibling **/
if (ZLOG_VALID_BLOCK(logBlks[2]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[2].blkNum, CACHE_WRITE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) != NULL)
{
cacheReleaseToss(buffer);
}
}
return zOK;
}
STATUS redoFmapRemove(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
node->extent[node->head.numRecs - 1].poolBlk =
logRecord->u.remove.finalExt.poolBlk;
node->extent[node->head.numRecs - 1].count =
logRecord->u.remove.finalExt.count;
node->head.numRecs -= logRecord->u.remove.numRecs;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
STATUS undoFmapRemove(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
node->head.numRecs += logRecord->u.remove.numRecs;
node->extent[node->head.numRecs - 1].poolBlk =
logRecord->u.remove.origExt.poolBlk;
node->extent[node->head.numRecs - 1].count =
logRecord->u.remove.origExt.count;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
STATUS redoFmapJoin(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
/** child **/
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_WRITE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) != NULL)
{
cacheReleaseToss(buffer);
}
}
/** parent **/
if (ZLOG_VALID_BLOCK(logBlks[1]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[1].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
node->extent[node->head.numRecs - 1].poolBlk = 0;
node->extent[node->head.numRecs - 1].count = 0;
node->head.numRecs--;
node->extent[node->head.numRecs - 1].count = MAX_FILE_BLK;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[1], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
STATUS undoFmapJoin(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
/* child */
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
bzero(buffer->pBuf.data, (1 << buffer->bufSizeShift));
node = (FmapNode_s *)buffer->pBuf.data;
node->extent[0].poolBlk = logRecord->u.join.childExt.poolBlk;
node->extent[0].count = logRecord->u.join.childExt.count;
node->head.numRecs = logRecord->u.join.childNumRecs;
node->head.magic = logRecord->u.join.childMagic;
node->head.fnh_internalID = logRecord->u.join.fj_internalID;
node->head.fnh_zid = logRecord->u.join.fj_zid;
node->head.state = logRecord->u.join.childState;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
/* parent */
if (ZLOG_VALID_BLOCK(logBlks[1]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[1].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
memmove(&node->extent[node->head.numRecs - 1],
&logRecord->u.join.parentExt[0],
2 * sizeof(FmapExt_s));
node->head.numRecs++;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[1], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
STATUS redoFmapToss(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
if (logRecord->u.toss.numRecs == 0)
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buffer, &iomsg);
CACHE_RELEASE(buffer);
buffer = NULL;
return zFAILURE;
}
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
node->head.state |= BT_ROOT;
node->head.magic = FMAP_BT_ROOT;
node->head.fnh_internalID = logRecord->u.toss.ft_internalID;
node->head.fnh_zid = logRecord->u.toss.ft_zid;
node->head.leafLink = INVALID_BLK_ZERO;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
else
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_WRITE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) != NULL)
{
cacheReleaseToss(buffer);
}
}
}
return zOK;
}
STATUS undoFmapToss(
GeneralMsg_s *genMsg,
ZfsPool_s *pool,
ZfsXasRecovery_s *logBuffer,
NINT pass)
{
FmapLog_s *logRecord;
BlockInfo_s *logBlks;
Buffer_s *buffer;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
logBlks = ZLOG_START_OF_POOL_BLOCKS(logBuffer);
logRecord = ZLOG_START_OF_LOG_RECORD(logBuffer);
if (ZLOG_VALID_BLOCK(logBlks[0]))
{
READBLK_IO_MSG(iomsg, pool, logBlks[0].blkNum, CACHE_UPDATE)
SET_DEBUG_ID(iomsg, 0);
if ((buffer = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buffer->pBuf.data;
if (logRecord->u.toss.numRecs == 0)
{ /* Not an block delete */
if (ZLOG_ALREADY_DONE(pool, logBuffer, node->head.lsn, pass))
{
CACHE_RELEASE(buffer);
}
else
{
node->head.magic = logRecord->u.toss.magic;
node->head.fnh_internalID = logRecord->u.toss.ft_internalID;
node->head.fnh_zid = logRecord->u.toss.ft_zid;
node->head.state = logRecord->u.toss.state;
node->head.leafLink = INVALID_BLK_ZERO;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
else
{ /* numrec of non-zero indicates this originally was
* a block delete. */
bzero(buffer->pBuf.data, (1 << buffer->bufSizeShift));
node->head.magic = logRecord->u.toss.magic;
node->head.fnh_internalID = logRecord->u.toss.ft_internalID;
node->head.fnh_zid = logRecord->u.toss.ft_zid;
node->head.state = logRecord->u.toss.state;
node->head.leafLink = INVALID_BLK_ZERO;
memcpy(&node->extent[0], &logRecord->u.toss.extent[0],
logRecord->u.toss.numRecs * sizeof(FmapExt_s));
node->head.numRecs = logRecord->u.toss.numRecs;
ZLOG_SET_LSN(logBuffer, node->head.lsn, logBlks[0], pass);
CACHE_DIRTY_RELEASE(buffer);
}
}
return zOK;
}
/*************************************************************************
* Shred Data
*************************************************************************/
BYTE DataShredPatterns[MAX_DATA_SHRED_PATTERNS] =
{
0x35, /* 00110101 */
0xCA, /* 11001010 */
0x97, /* 10010111 */
0x68, /* 01101000 */
0x55, /* 01010101 */
0xAA, /* 10101010 */
0xFF /* 11111111 */
};
struct page *DataShredPatPage[MAX_DATA_SHRED_PATTERNS];
NINT DataShredChunk = DATA_SHREDDING_CHUNK;
STATUS DataShredInit(
LONG patCount)
{
NINT i;
BYTE *data;
if ( patCount > MAX_DATA_SHRED_PATTERNS)
{
return zFAILURE;
}
for (i = 0; i < patCount; i++)
{
if (DataShredPatPage[i] == NULL)
{
MPKNSS_UNLOCK();
DataShredPatPage[i] = alloc_page(GFP_USER);
MPKNSS_LOCK();
if (DataShredPatPage[i] == NULL)
{
return zFAILURE;
}
data = kmap_atomic(DataShredPatPage[i], KM_USER0);
memset( data, DataShredPatterns[i], 1 << PAGE_SHIFT);
kunmap_atomic(DataShredPatPage[i], KM_USER0);
}
}
return zOK;
}
void DataShredCleanup()
{
NINT i;
for (i = 0; i < MAX_DATA_SHRED_PATTERNS; i++)
{
if (DataShredPatPage[i] != NULL)
{
MPKNSS_UNLOCK();
__free_page(DataShredPatPage[i]);
MPKNSS_LOCK();
}
}
}
void DataShred(
ZfsVolume_s *zfsVol,
Extent_s *extent)
{
zConPool_s *phypool = zfsVol->pool->storagepool->phypool;
Blknum_t poolBlk;
Blknum_t totalLength;
Blknum_t length;
NINT i;
zASSERT(phypool != NULL);
zASSERT(zfsVol->ZFSVOLshredCount > 0);
zASSERT(zfsVol->ZFSVOLshredCount <= MAX_DATA_SHRED_PATTERNS);
if (phypool == NULL)
{
return ;
}
for (i = 0; i < zfsVol->ZFSVOLshredCount; i++)
{
poolBlk = extent->poolBlkNum;
totalLength = extent->lengthOfExtent;
length = DataShredChunk;
while (totalLength > 0)
{
if (length > totalLength)
{
length = totalLength;
}
zlssBioIOPages(WRITE, phypool->ZCP_dev, DataShredPatPage[i],
poolBlk, length, phypool->pol.poolblocksize);
totalLength -= length;
poolBlk += length;
PERIODIC_YIELD();
}
/* Can block and release NSS spinlock */
ZLSSDoBarrierWriteIfRequired( phypool );
}
return;
}
/*************************************************************************
* Truncate File
*************************************************************************/
NINT truncBranchNodeChild(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Buffer_s *buf,
Buffer_s *parentBuf,
Blknum_t fileBlk);
STATUS btreeFileMapTrunc(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Buffer_s *buf,
Blknum_t fileBlk);
NINT truncLeafNode(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Buffer_s *buf,
Buffer_s *parentBuf,
Blknum_t fileBlk)
{
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
Fmap_s *fmap = &stInfo->fmap;
FmapNode_s *node;
FmapNode_s *parent;
Blknum_t poolBlk;
Extent_s extent;
IoMsg_s iomsg;
ZfsXaction_s *xaction;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
Lsn_t lsn;
SNINT periodicReleaseCount = PERIODIC_RELEASE_COUNT;
ASSERT_MPKNSS_LOCK();
node = (FmapNode_s *)(buf->pBuf.data);
zASSERT((node->head.magic == FMAP_BT_BRANCH) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_ROOT));
ContinueProcessingLeaf:
if (node->head.numRecs <= 1)
{
node->head.leafLink = INVALID_BLK_ZERO;
if (node->head.state & BT_ROOT)
{
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapToss_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_TOSS, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
lsn = logBuffer->ZXR_Lsn;
logRecord->u.toss.extent[0].poolBlk = node->extent[0].poolBlk;
logRecord->u.toss.extent[0].count = node->extent[0].count;
logRecord->u.toss.numRecs = 1;
logRecord->u.toss.magic = node->head.magic;
logRecord->u.toss.state = node->head.state;
zASSERT( node->head.fnh_zid != 0 );
logRecord->u.toss.ft_internalID = node->head.fnh_internalID;
logRecord->u.toss.ft_zid = node->head.fnh_zid;
ZLOG_DELETE_BLOCK(xaction, logBlks[0]);
ZLOG_TEST_REDO(xaction);
ZLOG_ReleaseRecord(xaction);
node->head.lsn = lsn;
CACHE_RELEASE(buf);
zASSERT(stInfo->fmapTreeBlks > 0);
stInfo->fmapTreeBlks--;
fmap->root = INVALID_BLK_ZERO;
stInfo->nextBlk = fmap->dirExt[fmap->numRecs -1].count;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
if (COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction) != zOK)
{
EndXlocal(xaction);
return zFAILURE;
}
EndXlocal(xaction);
}
else
{
parent = (FmapNode_s *)(parentBuf->pBuf.data);
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(2) +
sizeof(FmapJoin_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_JOIN, xaction, logBuffer, 2,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[1], parentBuf->volBlk,
parent->head.lsn, parentBuf, xaction, 1);
lsn = logBuffer->ZXR_Lsn;
memcpy(&logRecord->u.join.parentExt[0],
&parent->extent[parent->head.numRecs -2],
2 * sizeof(FmapExt_s));
logRecord->u.join.childExt.poolBlk = node->extent[0].poolBlk;
logRecord->u.join.childExt.count = node->extent[0].count;
logRecord->u.join.childState = node->head.state;
logRecord->u.join.childMagic = node->head.magic;
logRecord->u.join.childNumRecs = node->head.numRecs;
zASSERT( node->head.fnh_zid != 0 );
logRecord->u.join.fj_internalID = node->head.fnh_internalID;
logRecord->u.join.fj_zid = node->head.fnh_zid;
ZLOG_DELETE_BLOCK(xaction, logBlks[0]);
ZLOG_BIND(xaction, parentBuf);
ZLOG_TEST_REDO(xaction);
ZLOG_ReleaseRecord(xaction);
node->head.lsn = lsn;
parent->head.lsn = lsn;
parent->extent[parent->head.numRecs -1].count = 0;
parent->extent[parent->head.numRecs -1].poolBlk = 0;
parent->head.numRecs--;
if (parent->head.numRecs > 1)
{
poolBlk = parent->extent[parent->head.numRecs -1].poolBlk;
parent->extent[parent->head.numRecs -1].count = MAX_FILE_BLK;
parentBuf->state |= CACHE_DIRTY;
CACHE_RELEASE(buf);
zASSERT(stInfo->fmapTreeBlks > 0);
stInfo->fmapTreeBlks--;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
buf = NULL;
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 22);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(parentBuf);
EndXlocal(xaction);
return TRUNC_ERROR;
}
node = (FmapNode_s *)(buf->pBuf.data);
zASSERT((node->head.magic == FMAP_BT_ROOT) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_BRANCH));
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
CACHE_RELEASE(parentBuf);
EndXlocal(xaction);
return TRUNC_ERROR;
}
EndXlocal(xaction);
goto ContinueProcessingLeaf;
}
else
{
CACHE_DIRTY_RELEASE(parentBuf);
parentBuf = NULL;
CACHE_RELEASE(buf);
zASSERT(stInfo->fmapTreeBlks > 0);
stInfo->fmapTreeBlks--;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
EndXlocal(xaction);
return TRUNC_CONTINUE;
}
}
}
else if (node->extent[node->head.numRecs -1].count <= fileBlk)
{
if (node->extent[node->head.numRecs -1].poolBlk == 0)
{
/** The last entry is a sparse entry -- remove it and
** further truncate the file **/
fileBlk = node->extent[node->head.numRecs -2].count;
node->extent[node->head.numRecs -1].count = 0;
node->head.numRecs--;
goto ContinueProcessingLeaf;
}
if (parentBuf)
{
CACHE_RELEASE(parentBuf);
}
node->head.leafLink = INVALID_BLK_ZERO;
CACHE_DIRTY_RELEASE(buf);
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
stInfo->nextBlk = fileBlk;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
if (COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction) != zOK)
{
EndXlocal(xaction);
return zFAILURE;
}
EndXlocal(xaction);
}
else if (node->extent[node->head.numRecs -2].count >= fileBlk)
{
poolBlk = node->extent[node->head.numRecs -1].poolBlk;
if (DATA_SHREDDING_ENABLED(beast->vol.zfsVol) && (poolBlk != 0))
{
extent.poolBlkNum = poolBlk;
extent.lengthOfExtent = node->extent[node->head.numRecs -1].count -
node->extent[node->head.numRecs -2].count;
DataShred(beast->vol.zfsVol, &extent);
}
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapRemove_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_REMOVE, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
lsn = logBuffer->ZXR_Lsn;
logRecord->u.remove.origExt.poolBlk =
node->extent[node->head.numRecs -1].poolBlk;
logRecord->u.remove.origExt.count =
node->extent[node->head.numRecs -1].count;
logRecord->u.remove.finalExt.poolBlk = 0;
logRecord->u.remove.finalExt.count = 0;
logRecord->u.remove.numRecs = 1;
ZLOG_BIND(xaction, buf);
ZLOG_TEST_REDO(xaction);
ZLOG_ReleaseRecord(xaction);
node->head.lsn = lsn;
if (poolBlk == 0)
{
/* Hole in file.. nothing to free */
node->extent[node->head.numRecs -1].count = 0;
node->head.numRecs--;
}
else
{
extent.poolBlkNum = poolBlk;
extent.lengthOfExtent = node->extent[node->head.numRecs -1].count -
node->extent[node->head.numRecs -2].count;
node->extent[node->head.numRecs -1].count = 0;
node->extent[node->head.numRecs -1].poolBlk = 0;
node->head.numRecs--;
zASSERT(stInfo->fmapDataBlks >= extent.lengthOfExtent);
VOL_AdjustUsedUserSpace(&xaction->xaction, beast,
-((SQUAD)extent.lengthOfExtent << beast->ROOTblkSizeShift));
DIRQ_AdjustUsedDirSpace(&xaction->xaction,
beast->ROOTvolume, beast,
-((SQUAD)extent.lengthOfExtent << beast->ROOTblkSizeShift));
stInfo->fmapDataBlks-=extent.lengthOfExtent;
stInfo->nextBlk = node->extent[node->head.numRecs -1].count;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
/** FixFixFix6 Free should be done before logging or changing
** metadata
**/
if (zfsFreeExtent(genMsg, beast->vol.zfsVol,
&extent, xaction) != zOK)
{
if (parentBuf) CACHE_RELEASE(parentBuf);
node->head.leafLink = INVALID_BLK_ZERO;
CACHE_DIRTY_RELEASE(buf);
COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
aprintf(LRED, "Got an error from free tree. Potentially lost %d blocks\n", extent.lengthOfExtent);
return zFAILURE;
}
}
buf->state |= CACHE_DIRTY;
EndXlocal(xaction);
if (--periodicReleaseCount <= 0)
{
Blknum_t bufBlk = buf->volBlk;
periodicReleaseCount = PERIODIC_RELEASE_COUNT;
CACHE_RELEASE(buf);
buf = NULL;
if (parentBuf)
{
Blknum_t pBufBlk = parentBuf->volBlk;
CACHE_RELEASE(parentBuf);
READBLK_IO_MSG(iomsg, beast, pBufBlk, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 24);
parentBuf = NULL;
if ((parentBuf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
// CACHE_RELEASE(buf);
return zFAILURE;
}
/* I am just releasing and re reading it back it, so not
* checking the magic numbers
*/
}
Yield();
READBLK_IO_MSG(iomsg, beast, bufBlk, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 23);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
if (parentBuf)
{
CACHE_RELEASE(parentBuf);
}
return zFAILURE;
}
node = (FmapNode_s *)(buf->pBuf.data);
}
goto ContinueProcessingLeaf;
}
else
{
if (node->extent[node->head.numRecs -1].poolBlk == 0)
{
poolBlk = 0;
fileBlk = node->extent[node->head.numRecs -2].count;
}
else
{
poolBlk = node->extent[node->head.numRecs -1].poolBlk +
(fileBlk - node->extent[node->head.numRecs -2].count);
}
if (DATA_SHREDDING_ENABLED(beast->vol.zfsVol) && (poolBlk != 0))
{
extent.poolBlkNum = poolBlk;
extent.lengthOfExtent =
node->extent[node->head.numRecs -1].count - fileBlk;
DataShred(beast->vol.zfsVol, &extent);
}
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapRemove_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_REMOVE, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
lsn = logBuffer->ZXR_Lsn;
logRecord->u.remove.origExt.poolBlk =
node->extent[node->head.numRecs -1].poolBlk;
logRecord->u.remove.origExt.count =
node->extent[node->head.numRecs -1].count;
if (poolBlk == 0)
{
logRecord->u.remove.finalExt.poolBlk = 0;
logRecord->u.remove.finalExt.count = 0;
logRecord->u.remove.numRecs = 1;
}
else
{
logRecord->u.remove.finalExt.poolBlk =
node->extent[node->head.numRecs -1].poolBlk;
logRecord->u.remove.finalExt.count = fileBlk;
logRecord->u.remove.numRecs = 0;
}
ZLOG_BIND(xaction, buf);
ZLOG_TEST_REDO(xaction);
ZLOG_ReleaseRecord(xaction);
node->head.lsn = lsn;
if (poolBlk == 0)
{
/* Hole in file.. nothing to free */
node->extent[node->head.numRecs -1].count = 0;
node->head.numRecs--;
}
else
{
extent.poolBlkNum = poolBlk;
extent.lengthOfExtent =
node->extent[node->head.numRecs -1].count - fileBlk;
node->extent[node->head.numRecs -1].count = fileBlk;
zASSERT(stInfo->fmapDataBlks >= extent.lengthOfExtent);
VOL_AdjustUsedUserSpace(&xaction->xaction, beast,
-((SQUAD)extent.lengthOfExtent << beast->ROOTblkSizeShift));
DIRQ_AdjustUsedDirSpace(&xaction->xaction,
beast->ROOTvolume, beast,
-((SQUAD)extent.lengthOfExtent << beast->ROOTblkSizeShift));
stInfo->fmapDataBlks-=extent.lengthOfExtent;
stInfo->nextBlk = node->extent[node->head.numRecs -1].count;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
/** FixFixFix6 Free should be done before logging or changing
** metadata
**/
if (zfsFreeExtent(genMsg, beast->vol.zfsVol,
&extent, xaction) != zOK)
{
if (parentBuf) CACHE_RELEASE(parentBuf);
node->head.leafLink = INVALID_BLK_ZERO;
CACHE_DIRTY_RELEASE(buf);
COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
aprintf(LRED, "Got an error from free tree. Potentially lost %d blocks\n", extent.lengthOfExtent);
return TRUNC_ERROR;
}
}
buf->state |= CACHE_DIRTY;
EndXlocal(xaction);
goto ContinueProcessingLeaf;
}
return TRUNC_DONE;
}
NINT truncBranchNodeChild(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Buffer_s *buf,
Buffer_s *parentBuf,
Blknum_t fileBlk)
{
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
FmapNode_s *node;
FmapNode_s *parent;
Blknum_t poolBlk;
// Extent_s extent;
IoMsg_s iomsg;
ZfsXaction_s *xaction;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
Lsn_t lsn;
ASSERT_MPKNSS_LOCK();
ContinueDownTheTree:
node = (FmapNode_s *)(buf->pBuf.data);
zASSERT((node->head.magic == FMAP_BT_ROOT) ||
(node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_BRANCH));
parent = (FmapNode_s *)(parentBuf->pBuf.data);
zASSERT((parent->head.magic == FMAP_BT_ROOT) ||
(parent->head.magic == FMAP_BT_LEAF) ||
(parent->head.magic == FMAP_BT_BRANCH));
if (node->head.state & BT_LEAF)
{
return truncLeafNode(genMsg, beast, buf, parentBuf, fileBlk);
}
else if (node->head.numRecs > 1)
{
poolBlk = node->extent[node->head.numRecs -1].poolBlk;
CACHE_RELEASE(parentBuf);
parentBuf = buf;
buf = NULL;
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 25);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(parentBuf);
return TRUNC_ERROR;
}
node = (FmapNode_s *)(buf->pBuf.data);
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
CACHE_RELEASE(parentBuf);
buf = NULL;
parentBuf = NULL;
return TRUNC_ERROR;
}
goto ContinueDownTheTree;
}
else
{
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(2) +
sizeof(FmapJoin_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_JOIN, xaction, logBuffer, 2,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[1], parentBuf->volBlk,
parent->head.lsn, parentBuf, xaction, 1);
lsn = logBuffer->ZXR_Lsn;
memcpy(&logRecord->u.join.parentExt[0],
&parent->extent[parent->head.numRecs -2],
2 * sizeof(FmapExt_s));
logRecord->u.join.childExt.poolBlk = node->extent[0].poolBlk;
logRecord->u.join.childExt.count = node->extent[0].count;
logRecord->u.join.childState = node->head.state;
logRecord->u.join.childMagic = node->head.magic;
logRecord->u.join.childNumRecs = node->head.numRecs;
zASSERT( node->head.fnh_zid != 0 );
logRecord->u.join.fj_internalID = node->head.fnh_internalID;
logRecord->u.join.fj_zid = node->head.fnh_zid;
ZLOG_DELETE_BLOCK(xaction, logBlks[0]);
ZLOG_BIND(xaction, parentBuf);
ZLOG_TEST_REDO(xaction);
ZLOG_ReleaseRecord(xaction);
node->head.lsn = lsn;
parent->head.lsn = lsn;
parent->extent[parent->head.numRecs -1].count = 0;
parent->extent[parent->head.numRecs -1].poolBlk = 0;
parent->head.numRecs--;
if (parent->head.numRecs > 1)
{
poolBlk = parent->extent[parent->head.numRecs -1].poolBlk;
parent->extent[parent->head.numRecs -1].count = MAX_FILE_BLK;
parentBuf->state |= CACHE_DIRTY;
CACHE_RELEASE(buf);
zASSERT(stInfo->fmapTreeBlks > 0);
stInfo->fmapTreeBlks--;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
buf = NULL;
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 26);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(parentBuf);
EndXlocal(xaction);
return TRUNC_ERROR;
}
node = (FmapNode_s *)(buf->pBuf.data);
if ((node->head.magic != FMAP_BT_BRANCH) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_ROOT))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
CACHE_RELEASE(parentBuf);
buf = NULL;
parentBuf = NULL;
EndXlocal(xaction);
return TRUNC_ERROR;
}
EndXlocal(xaction);
goto ContinueDownTheTree;
}
else
{
CACHE_DIRTY_RELEASE(parentBuf);
parentBuf = NULL;
CACHE_RELEASE(buf);
zASSERT(stInfo->fmapTreeBlks > 0);
stInfo->fmapTreeBlks--;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
EndXlocal(xaction);
return TRUNC_CONTINUE;
}
}
return TRUNC_DONE;
}
STATUS btreeFileMapTrunc(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Buffer_s *buf,
Blknum_t fileBlk)
{
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
Fmap_s *fmap = &stInfo->fmap;
// Extent_s extent;
Buffer_s *parentBuf = NULL;
FmapNode_s *node = (FmapNode_s *)buf->pBuf.data;
Blknum_t poolBlk;
IoMsg_s iomsg;
NINT ccode;
ZfsXaction_s *xaction;
ZfsXasRecovery_s *logBuffer;
BlockInfo_s *logBlks;
FmapLog_s *logRecord;
Lsn_t lsn;
ASSERT_MPKNSS_LOCK();
ContinueTraversingTheTree:
zASSERT(node->head.magic == FMAP_BT_ROOT);
zASSERT(node->head.state & BT_ROOT);
if (node->head.state & BT_LEAF)
{
ccode = truncLeafNode(genMsg, beast, buf, parentBuf, fileBlk);
if (ccode == TRUNC_CONTINUE)
{
buf = NULL;
READBLK_IO_MSG(iomsg, beast, fmap->root, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 27);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
parentBuf = NULL;
node = (FmapNode_s *)buf->pBuf.data;
if (node->head.magic != FMAP_BT_ROOT)
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
goto ContinueTraversingTheTree;
}
else if (ccode == TRUNC_DONE)
{
return zOK;
}
else
{
return zFAILURE;
}
}
else
{
if (node->head.numRecs == 1)
{
/** this is the end .. All my leaves are gone **/
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapToss_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_TOSS, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
lsn = logBuffer->ZXR_Lsn;
logRecord->u.toss.extent[0].poolBlk = node->extent[0].poolBlk;
logRecord->u.toss.extent[0].count = node->extent[0].count;
logRecord->u.toss.numRecs = 1;
logRecord->u.toss.magic = node->head.magic;
logRecord->u.toss.state = node->head.state;
zASSERT( node->head.fnh_zid != 0 );
logRecord->u.toss.ft_internalID = node->head.fnh_internalID;
logRecord->u.toss.ft_zid = node->head.fnh_zid;
ZLOG_DELETE_BLOCK(xaction, logBlks[0]);
ZLOG_TEST_REDO(xaction);
ZLOG_ReleaseRecord(xaction);
node->head.lsn = lsn;
fmap->root = INVALID_BLK_ZERO;
stInfo->nextBlk = fmap->dirExt[fmap->numRecs -1].count;
CACHE_RELEASE(buf);
zASSERT(stInfo->fmapTreeBlks > 0);
stInfo->fmapTreeBlks--;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
if (COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction) != zOK)
{
if (parentBuf)
{
CACHE_RELEASE(parentBuf);
}
EndXlocal(xaction);
return zFAILURE;
}
EndXlocal(xaction);
}
else if (node->head.numRecs == 2)
{
/** Root Blk has only 1 entry (other one is std first entry), so we
** can shrink the B-tree **/
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapToss_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_TOSS, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
lsn = logBuffer->ZXR_Lsn;
logRecord->u.toss.extent[0].poolBlk = node->extent[0].poolBlk;
logRecord->u.toss.extent[0].count = node->extent[0].count;
logRecord->u.toss.extent[1].poolBlk = node->extent[1].poolBlk;
logRecord->u.toss.extent[1].count = node->extent[1].count;
logRecord->u.toss.numRecs = 2;
logRecord->u.toss.magic = node->head.magic;
logRecord->u.toss.state = node->head.state;
zASSERT( node->head.fnh_zid != 0 );
logRecord->u.toss.ft_internalID = node->head.fnh_internalID;
logRecord->u.toss.ft_zid = node->head.fnh_zid;
ZLOG_DELETE_BLOCK(xaction, logBlks[0]);
ZLOG_TEST_REDO(xaction);
ZLOG_ReleaseRecord(xaction);
node->head.lsn = lsn;
fmap->root = node->extent[node->head.numRecs -1].poolBlk;
CACHE_RELEASE(buf);
zASSERT(stInfo->fmapTreeBlks > 0);
stInfo->fmapTreeBlks--;
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
if (COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction) != zOK)
{
EndXlocal(xaction);
return zFAILURE;
}
buf = NULL;
READBLK_IO_MSG(iomsg, beast, fmap->root, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 28);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
EndXlocal(xaction);
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
if ((node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_BRANCH))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
EndXlocal(xaction);
return zFAILURE;
}
ZLOG_ObtainRecord(xaction, ZLOG_BLOCK_INFO_SIZE(1) +
sizeof(FmapToss_s));
ZLOG_INIT_LOG_RECORD(XFUNC_FMAP_TOSS, xaction, logBuffer, 1,
logBlks, logRecord);
ZLOG_ASSIGN_BLOCK_INFO(logBlks[0], buf->volBlk,
node->head.lsn, buf, xaction, 0);
lsn = logBuffer->ZXR_Lsn;
logRecord->u.toss.numRecs = 0;
logRecord->u.toss.magic = node->head.magic;
logRecord->u.toss.state = node->head.state;
zASSERT( node->head.fnh_zid != 0 );
logRecord->u.toss.ft_internalID = node->head.fnh_internalID;
logRecord->u.toss.ft_zid = node->head.fnh_zid;
ZLOG_TEST_REDO(xaction);
ZLOG_BIND(xaction, buf);
ZLOG_ReleaseRecord(xaction);
node->head.lsn = lsn;
node->head.state |= BT_ROOT;
node->head.magic = FMAP_BT_ROOT; // Waited because log record
// hold the old MAGIC/type of
// this node.
buf->state |= CACHE_DIRTY;
EndXlocal(xaction);
goto ContinueTraversingTheTree;
}
else
{
poolBlk = node->extent[node->head.numRecs -1].poolBlk;
parentBuf = buf;
buf = NULL;
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 29);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(parentBuf);
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
if ((node->head.magic != FMAP_BT_ROOT) &&
(node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_BRANCH))
{
CACHE_RELEASE(parentBuf);
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
ccode = truncBranchNodeChild(genMsg,beast,buf,parentBuf,fileBlk);
if (ccode == TRUNC_CONTINUE)
{
buf = NULL;
READBLK_IO_MSG(iomsg, beast, fmap->root, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 30);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
parentBuf = NULL;
node = (FmapNode_s *)buf->pBuf.data;
if (node->head.magic != FMAP_BT_ROOT)
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
goto ContinueTraversingTheTree;
}
else if (ccode == TRUNC_DONE)
{
return zOK;
}
else
{
return zFAILURE;
}
}
}
return zOK;
}
STATUS directFileMapTrunc(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Blknum_t fileBlk)
{
STATUS status = zOK;
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
Fmap_s *fmap = &stInfo->fmap;
Extent_s extent;
ZfsXaction_s *xaction;
Blknum_t length;
SQUAD totalLength = 0;
NINT numRecs;
ASSERT_MPKNSS_LOCK();
if (fmap->numRecs <= 1)
{
return status;
}
if (DATA_SHREDDING_ENABLED(beast->vol.zfsVol))
{
numRecs = fmap->numRecs;
while(fmap->dirExt[numRecs -1].count > fileBlk)
{
if (fmap->dirExt[numRecs -2].count >= fileBlk)
{
extent.poolBlkNum = fmap->dirExt[numRecs - 1].poolBlk;
extent.lengthOfExtent = fmap->dirExt[numRecs -1].count -
fmap->dirExt[numRecs -2].count;
}
else
{
extent.poolBlkNum = fmap->dirExt[numRecs - 1].poolBlk +
(fileBlk - fmap->dirExt[numRecs -2].count);
extent.lengthOfExtent = fmap->dirExt[numRecs -1].count -
fileBlk;
}
DataShred(beast->vol.zfsVol, &extent);
numRecs--;
zASSERT(numRecs != 0);
}
}
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
while(fmap->dirExt[fmap->numRecs -1].count > fileBlk)
{
if (fmap->dirExt[fmap->numRecs -2].count >= fileBlk)
{
extent.poolBlkNum = fmap->dirExt[fmap->numRecs - 1].poolBlk;
extent.lengthOfExtent = fmap->dirExt[fmap->numRecs -1].count -
fmap->dirExt[fmap->numRecs -2].count;
zASSERT(stInfo->fmapDataBlks >= extent.lengthOfExtent);
length = extent.lengthOfExtent;
if (zfsFreeExtent(genMsg, beast->vol.zfsVol,
&extent, xaction) != zOK)
{
status = zFAILURE;
break;
}
fmap->dirExt[fmap->numRecs -1].count = 0;
fmap->dirExt[fmap->numRecs -1].poolBlk = 0;
fmap->numRecs--;
}
else
{
extent.poolBlkNum = fmap->dirExt[fmap->numRecs - 1].poolBlk +
(fileBlk - fmap->dirExt[fmap->numRecs -2].count);
extent.lengthOfExtent = fmap->dirExt[fmap->numRecs -1].count -
fileBlk;
zASSERT(stInfo->fmapDataBlks >= extent.lengthOfExtent);
length = extent.lengthOfExtent;
if (zfsFreeExtent(genMsg, beast->vol.zfsVol,
&extent, xaction) != zOK)
{
status = zFAILURE;
break;
}
fmap->dirExt[fmap->numRecs -1].count = fileBlk;
}
totalLength+=length;
stInfo->fmapDataBlks-=length;
stInfo->nextBlk = fmap->dirExt[fmap->numRecs -1].count;
}
VOL_AdjustUsedUserSpace(&xaction->xaction, beast,
-(totalLength << beast->ROOTblkSizeShift));
DIRQ_AdjustUsedDirSpace(&xaction->xaction, beast->ROOTvolume, beast,
-(totalLength << beast->ROOTblkSizeShift));
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
status = COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction);
EndXlocal(xaction);
return status;
}
/****************************************************************************
* Copy the file map to the snap shot file
*****************************************************************************/
STATUS CopyTheFileMapToSnapBeast(
GeneralMsg_s *genMsg,
ZfsXaction_s *xaction,
RootBeast_s *beast)
{
RootBeast_s *snapBeast;
ASSERT_MPKNSS_LOCK();
snapBeast = beast->fileSnapshotBeast;
zASSERT(snapBeast != NULL);
ASSERT_XLATCH(&snapBeast->ROOTbeastLatch);
snapBeast->storage.zfsInfo->nextBlk =
beast->storage.zfsInfo->nextBlk;
snapBeast->storage.zfsInfo->fmapDataBlks =
beast->storage.zfsInfo->fmapDataBlks;
snapBeast->storage.zfsInfo->fmapTreeBlks =
beast->storage.zfsInfo->fmapTreeBlks;
memcpy(&snapBeast->storage.zfsInfo->fmap,
&beast->storage.zfsInfo->fmap, sizeof(Fmap_s));
beast->storage.zfsInfo->nextBlk = 0;
beast->storage.zfsInfo->fmapDataBlks = 0;
beast->storage.zfsInfo->fmapTreeBlks = 0;
bzero(&beast->storage.zfsInfo->fmap, sizeof(Fmap_s));
COMN_MARK_BEAST_XLOCAL(snapBeast, &xaction->xaction);
if (COMN_ForceBeastWrite(genMsg, snapBeast, &xaction->xaction) != zOK)
{
return zFAILURE;
}
COMN_MARK_BEAST_XLOCAL(beast, &xaction->xaction);
if (COMN_ForceBeastWrite(genMsg, beast, &xaction->xaction) != zOK)
{
snapBeast->storage.zfsInfo->nextBlk = 0;
snapBeast->storage.zfsInfo->fmapDataBlks = 0;
snapBeast->storage.zfsInfo->fmapTreeBlks = 0;
bzero(&snapBeast->storage.zfsInfo->fmap, sizeof(Fmap_s));
COMN_MARK_BEAST_XLOCAL(snapBeast, &xaction->xaction);
COMN_ForceBeastWrite(genMsg, snapBeast, &xaction->xaction);
return zFAILURE;
}
return zOK;
}
/****************************************************************************
* Set up beginning of file truncation
*****************************************************************************/
STATUS ZFSVOL_VOL_truncateFile (
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Blknum_t fileBlk,
Blkcnt_t count)
{
ZFSStorageInfo_s *stInfo;
Fmap_s *fmap;
Buffer_s *buf = NULL;
IoMsg_s iomsg;
FmapNode_s *node;
ZfsXaction_s *xaction;
STATUS status;
ASSERT_MPKNSS_LOCK();
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
zASSERT(count == -1);
ASSERT_XLATCH( &beast->ROOTbeastLatch);
zASSERT(!(beast->ROOTvolume->VOLenabledAttributes & zATTR_READONLY));
if ((genMsg->flags & COPY_FMAP_TO_SNAP) &&
(beast->ROOTvolume->VOLenabledAttributes & zATTR_COW))
{
/* If snapshotting is enabled on the volume and the file is being
* truncated, we just copy the filemap to the snapshot beast if
* the file is being truncated to zero. Else, the high level routine
* will copy individual blocks to the snapshot beast, before truncating
* the original file
*/
if (fileBlk == 0)
{
/*
* Adjust used space now because the truncated blocks are being
* moved over to the snapshot beast, not actually being released.
* To keep the users/directory used block counts correct they
* should be decremented now.
*/
xaction = BeginXLocal(beast->vol.volume,BXL_DEFAULT|X_CF_OK_TO_THROTTLE);
VOL_AdjustUsedUserSpace(&xaction->xaction, beast,
-((SQUAD)stInfo->fmapDataBlks << beast->ROOTblkSizeShift));
DIRQ_AdjustUsedDirSpace(&xaction->xaction, beast->ROOTvolume, beast,
-((SQUAD)stInfo->fmapDataBlks << beast->ROOTblkSizeShift));
status = CopyTheFileMapToSnapBeast(genMsg, xaction, beast);
EndXlocal(xaction);
return status;
}
else
{
SetErrno(genMsg, zERR_ACCESS_DENIED);
return zFAILURE;
}
}
if (fileBlk >= stInfo->nextBlk)
{
/** the fileBlk to be truncated is beyond EOF **/
/** Should this be an error? -- I don't think so -- Vandana **/
return zOK;
}
if ((fmap->numRecs <= 1) && (fmap->root == INVALID_BLK_ZERO))
{
/** File is already zero, and has no blocks associated with it **/
/** Should this be an error? -- I don't think so -- Vandana **/
return zOK;
}
if (fmap->root)
{
READBLK_IO_MSG(iomsg, beast, fmap->root, CACHE_UPDATE);
SET_DEBUG_ID(iomsg, 31);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
if (node->head.magic != FMAP_BT_ROOT)
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
if (btreeFileMapTrunc(genMsg, beast, buf, fileBlk) != zOK)
{
return zFAILURE;
}
}
if (fmap->root == INVALID_BLK_ZERO)
{
if (directFileMapTrunc(genMsg, beast, fileBlk) != zOK)
{
return zFAILURE;
}
}
return zOK;
}
/************************************************************************
************************************************************************/
/***************************************************************************
*
* This returns an array of longs which contains the size (in blocks) of
* each logically allocated region of file. This is also an entry
* for every hole in the file. For allocated extents, the entries are
* positive numbers indicating how many blocks are in the extent. For
* holes, the entries are negative numbers, the complement of which
* indicates how many blocks make up the hole.
***************************************************************************/
STATUS ZFSVOL_VOL_getExtentList(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Blknum_t fileBlk,
NINT extentListSize,
Blknum_t (*extentList)[],
NINT *retNumExtents,
Blknum_t *retNextBlock)
{
Buffer_s *buf;
ZFSStorageInfo_s *stInfo;
Fmap_s *fmap;
Blknum_t poolBlk;
NINT i, index;
FmapNode_s *node;
Blknum_t seed;
Blknum_t retLen = 0;
IoMsg_s iomsg;
BOOL fileBlkInDirectMap = FALSE;
ASSERT_MPKNSS_LOCK();
ASSERT_LATCH( &beast->ROOTbeastLatch);
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
*retNumExtents = 0;
*retNextBlock = stInfo->nextBlk;
if (fileBlk >= stInfo->nextBlk)
{
SetErrno(genMsg, zERR_END_OF_FILE);
return zFAILURE;
}
i = 0;
(*extentList)[i] = 0;
if (fileBlk < fmap->dirExt[fmap->numRecs - 1].count)
{
searchDirectMap(fmap, fileBlk, &seed, &retLen, &index);
(*extentList)[i] += (fmap->dirExt[fmap->numRecs -1].count - fileBlk);
fileBlk = fmap->dirExt[fmap->numRecs -1].count;
*retNumExtents = 1;
fileBlkInDirectMap = TRUE;
}
if (fmap->root != 0)
{
READBLK_IO_MSG(iomsg, beast, fmap->root, CACHE_READ);
SET_DEBUG_ID(iomsg, 32);
buf = ZFS_ReadPoolBlk(genMsg, &iomsg);
if (buf == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
if (node->head.magic != FMAP_BT_ROOT)
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
while (!(node->head.state & BT_LEAF))
{
poolBlk = searchBranch(node, fileBlk, &index);
CACHE_RELEASE(buf);
buf = NULL;
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_READ);
SET_DEBUG_ID(iomsg, 33);
buf = ZFS_ReadPoolBlk(genMsg, &iomsg);
if (buf == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
if ((node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_BRANCH))
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
}
poolBlk = searchLeaf(node, fileBlk, &retLen, &index);
/** If the first extent is in the filemap and is a hole,
** the direct map will not have added an entry and we need to
** do this extra check here, before we go into the for loop.
**/
if (!fileBlkInDirectMap)
{
/* Need to handle starting in the middle of an extent that
* has not been handle in direct map code.
*/
if (poolBlk == 0)
{
/* Hole code */
(*extentList)[i] = -(node->extent[index].count - fileBlk);
if (++i == extentListSize)
{
*retNextBlock = node->extent[index].count;
*retNumExtents = i;
goto returnValues;
}
(*extentList)[i] = 0;
}
else
{
/* Non-hole code */
(*extentList)[i] += (node->extent[index].count - fileBlk);
}
index++;
}
while (1)
{
for (;index < node->head.numRecs; index++)
{
if (node->extent[index].poolBlk != 0)
{
(*extentList)[i] += (node->extent[index].count -
node->extent[index-1].count);
}
else
{
if (++i == extentListSize)
{
*retNextBlock = node->extent[index-1].count;
*retNumExtents = i;
goto returnValues;
}
(*extentList)[i] = -(node->extent[index].count -
node->extent[index-1].count);
if (++i == extentListSize)
{
*retNextBlock = node->extent[index].count;
*retNumExtents = i;
goto returnValues;
}
(*extentList)[i] = 0;
}
}
LB_delay(0);
poolBlk = node->head.leafLink;
if ((poolBlk == INVALID_BLK_ZERO) ||
(node->extent[node->head.numRecs -1].count >= stInfo->nextBlk))
{
*retNextBlock = stInfo->nextBlk;
*retNumExtents = ++i;
goto returnValues;
}
CACHE_RELEASE(buf);
buf = NULL;
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_READ);
SET_DEBUG_ID(iomsg, 34);
buf = ZFS_ReadPoolBlk(genMsg, &iomsg);
if (buf == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
if (node->head.magic != FMAP_BT_LEAF)
{
SetErrno(genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
return zFAILURE;
}
index = 1;
}
returnValues:
CACHE_RELEASE(buf);
buf = NULL;
}
return zOK;
}
/*
* Convert a pool extent to a device number and physical extent.
*/
STATUS pool2physicalExtent (
zNSSMsg_s *msg,
RootBeast_s *beast,
Extent_s *extent)
{
zPhysicalExtent_s *physical = (zPhysicalExtent_s *)(uintptr_t)msg->sys.data[MAP_DATA].start;
NINT maxExtents;
NINT numExtents;
STATUS status;
ADDR deviceID;
QUAD fileByteOffset;
QUAD poolByteOffset;
QUAD deviceByteOffset;
QUAD poolByteLength;
QUAD deviceByteLength;
maxExtents = msg->sys.data[MAP_DATA].length / sizeof(zPhysicalExtent_s);
numExtents = msg->body.map.retExtentListCount;
if (numExtents >= maxExtents)
{
SetStatus(msg, zERR_FINISHED_WITH_EXTENTS);
return zFAILURE;
}
fileByteOffset = msg->body.map.retEndingOffset;
poolByteOffset = ((QUAD)(LONG)extent->poolBlkNum) << beast->blkSizeShift;
poolByteLength = ((QUAD)(LONG)extent->lengthOfExtent) << beast->blkSizeShift;
if (poolByteOffset == 0)
{ /*
* Hole in file, we just skip passed it and don't change the number
* of extents we are returning.
*/
msg->body.map.retEndingOffset = poolByteLength + fileByteOffset;
return zOK;
}
physical = &physical[numExtents];
for (;;)
{
status = ZFSMAL_PhysicalExtent(beast->vol.zfsVol, poolByteOffset,
poolByteLength, &deviceID,
&deviceByteOffset, &deviceByteLength);
if (status != zOK)
{
SetStatus(msg, status);
return zFAILURE;
}
physical->logicalOffset = fileByteOffset;
physical->poolOffset = poolByteOffset;
physical->physical.offset = deviceByteOffset;
physical->physical.deviceID = deviceID;
++numExtents;
if (poolByteLength <= deviceByteLength) // Should we limit length by size of file?
{
physical->length = poolByteLength;
msg->body.map.retEndingOffset = fileByteOffset + poolByteLength;
msg->body.map.retExtentListCount = numExtents;
return zOK;
}
physical->length = deviceByteLength;
fileByteOffset += deviceByteLength;
poolByteOffset += deviceByteLength;
poolByteLength -= deviceByteLength;
++physical;
if (numExtents >= maxExtents)
{
msg->body.map.retEndingOffset = fileByteOffset;
msg->body.map.retExtentListCount = numExtents;
SetStatus(msg, zERR_FINISHED_WITH_EXTENTS);
return zFAILURE;
}
}
}
/*
* ZFSVOL_doDirectPhysicalExtents - handles the direct physical extents
*/
STATUS ZFSVOL_doDirectPhysicalExtents (
zNSSMsg_s *msg,
RootBeast_s *beast)
{
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
Fmap_s *fmap = &stInfo->fmap;
Blknum_t fileBlk;
NINT i;
STATUS status;
Extent_s extent;
Blknum_t diff;
ASSERT_MPKNSS_LOCK();
ASSERT_SLATCH( &beast->ROOTbeastLatch);
/*
* retEndingOffset was initialized to the passed in offset.
*/
fileBlk = msg->body.map.retEndingOffset >> beast->blkSizeShift;
for (i = 1; i < fmap->numRecs; ++i)
{ /*
* This if lets us skip forward to the correct offset
* in file map.
*/
if (fileBlk < fmap->dirExt[i].count)
{
extent.poolBlkNum = fmap->dirExt[i].poolBlk;
extent.lengthOfExtent = fmap->dirExt[i].count -
fmap->dirExt[i-1].count;
if (fileBlk > fmap->dirExt[i-1].count)
{
diff = fileBlk - fmap->dirExt[i-1].count;
extent.poolBlkNum += diff;
extent.lengthOfExtent -= diff;
}
status = pool2physicalExtent(msg, beast, &extent);
if (status != zOK)
{
return zFAILURE;
}
}
}
return zOK;
}
/*
* Given a file block number in the beast, descend the
* file map tree and find the extent mapping information.
*/
Buffer_s *descendFileMapTree (RootBeast_s *beast, Blknum_t fileBlk)
{
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
Fmap_s *fmap = &stInfo->fmap;
GeneralMsg_s genMsg;
IoMsg_s iomsg;
Buffer_s *buf;
FmapNode_s *node;
Blknum_t poolBlk;
NINT index;
COMN_SETUP_GENERAL_MSG_NO_CONNECTION_RESOLVE( &genMsg);
for (poolBlk = fmap->root; poolBlk != 0;)
{
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_READ);
SET_DEBUG_ID(iomsg, 36);
buf = ZFS_ReadPoolBlk( &genMsg, &iomsg);
if (buf == NULL)
{
return NULL;
}
node = (FmapNode_s *)buf->pBuf.data;
if (node->head.state & BT_LEAF)
{
return buf;
}
poolBlk = searchBranch(node, fileBlk, &index);
CACHE_RELEASE(buf);
}
return NULL;
}
STATUS scanFileMapLeaf (
zNSSMsg_s *msg,
RootBeast_s *beast,
FmapNode_s *node)
{
Blknum_t fileBlk;
NINT index;
NINT numRecs = node->head.numRecs;
NINT i;
STATUS status;
Extent_s extent;
ASSERT_MPKNSS_LOCK();
fileBlk = msg->body.map.retEndingOffset >> beast->blkSizeShift;
/*
* Find the first extent that contains the file block.
*/
extent.poolBlkNum = searchLeaf(node, fileBlk, &extent.lengthOfExtent,
&index);
for (i = index; i < numRecs; ++i)
{
if (i != index)
{
extent.poolBlkNum = node->extent[i].poolBlk;
extent.lengthOfExtent = node->extent[i].count -
node->extent[i-1].count;
}
status = pool2physicalExtent(msg, beast, &extent);
if (status != zOK)
{
return zFAILURE;
}
}
return zOK;
}
/*
* ZFSVOL_doTreePhysicalExtents - handles physical extents in the
* file map tree.
*/
STATUS ZFSVOL_doTreePhysicalExtents (
zNSSMsg_s *msg,
RootBeast_s *beast)
{
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
GeneralMsg_s genMsg;
IoMsg_s iomsg;
FmapNode_s *node;
Buffer_s *buf;
Blknum_t fileBlk;
Blknum_t poolBlk;
STATUS status;
COMN_SETUP_GENERAL_MSG_NO_CONNECTION_RESOLVE( &genMsg);
fileBlk = msg->body.map.retEndingOffset >> beast->blkSizeShift;
buf = descendFileMapTree(beast, fileBlk);
if (buf == NULL)
{
return zOK;
}
for (;;)
{
node = (FmapNode_s *)buf->pBuf.data;
status = scanFileMapLeaf(msg, beast, node);
if (status != zOK)
{
CACHE_RELEASE(buf);
return zFAILURE;
}
PERIODIC_YIELD();
poolBlk = node->head.leafLink;
if ((poolBlk == INVALID_BLK_ZERO)
|| (node->extent[node->head.numRecs-1].count >= stInfo->nextBlk))
{
CACHE_RELEASE(buf);
return zOK;
}
CACHE_RELEASE(buf);
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_READ);
SET_DEBUG_ID(iomsg, 40);
buf = ZFS_ReadPoolBlk( &genMsg, &iomsg);
if (buf == NULL)
{
SetStatusFromErrno(msg, &genMsg);
return zFAILURE;
}
}
}
/***************************************************************************
* ZFSVOL_VOL_getPhysicalExtent - returns a list of physical extents for
* a file.
***************************************************************************/
STATUS ZFSVOL_VOL_getPhysicalExtent (
zNSSMsg_s *msg,
RootBeast_s *beast)
{
ZFSStorageInfo_s *stInfo = beast->storage.zfsInfo;
Blknum_t fileBlk;
Blknum_t nextBlk;
STATUS status;
ASSERT_MPKNSS_LOCK();
ASSERT_SLATCH( &beast->ROOTbeastLatch);
/*
* Initialize the return values. They are updated in pool2physicalExtent
*/
msg->body.map.retEndingOffset = msg->body.map.offset;
msg->body.map.retExtentListCount = 0;
nextBlk = stInfo->nextBlk;
fileBlk = msg->body.map.offset >> beast->blkSizeShift;
if (fileBlk >= nextBlk)
{
SetStatus(msg, zERR_END_OF_FILE);
return zFAILURE;
}
status = ZFSVOL_doDirectPhysicalExtents(msg, beast);
if (status != zOK)
{
goto finish;
}
status = ZFSVOL_doTreePhysicalExtents(msg, beast);
if (status != zOK)
{
goto finish;
}
return zOK;
finish:
if (GetStatus(msg) == zERR_FINISHED_WITH_EXTENTS)
{
ClearStatus(msg);
return zOK;
}
return zFAILURE;
}
/*****************************************************************************
* Given a fileBlk, ZFS_fileBlk2volBlk converts it to the poolBlk
* address and returns the value. If the fileBlk corresponds to a
* hole in the file, 0 is returned. Input parameter length specifies
* the length of logical file blocks required. As output it is set
* to a value (less than or equal to the input value), specifying the
* number of contiguous poolblocks that correspond to the fileblock
*****************************************************************************/
Blknum_t ZFS_fileBlk2volBlk (
RootBeast_s *beast,
Blknum_t fileBlk,
NINT *length)
{
GeneralMsg_s genMsg;
Buffer_s *buf;
ZFSStorageInfo_s *stInfo;
Fmap_s *fmap;
Blknum_t poolBlk;
Blknum_t retLen = 0;
Blknum_t seed = 0;
NINT index = 0;
FmapNode_s *node;
IoMsg_s iomsg;
ASSERT_MPKNSS_LOCK();
ASSERT_LATCH( &beast->ROOTbeastLatch);
COMN_SETUP_GENERAL_MSG_NOSA(&genMsg);
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
if (fileBlk >= stInfo->nextBlk)
{
poolBlk = 0;
}
else if (fileBlk < fmap->dirExt[fmap->numRecs - 1].count)
{
poolBlk = searchDirectMap(fmap, fileBlk, &seed, &retLen, &index);
}
else if (fmap->root)
{
READBLK_IO_MSG(iomsg, beast, fmap->root, CACHE_READ);
SET_DEBUG_ID(iomsg, 0);
buf = ZFS_ReadPoolBlk(&genMsg, &iomsg);
if (buf == NULL)
{
poolBlk = 0;
goto returnStatus;
}
node = (FmapNode_s *)buf->pBuf.data;
if (node->head.magic != FMAP_BT_ROOT)
{
SetErrno(&genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(&genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
poolBlk = 0;
goto returnStatus;
}
while (!(node->head.state & BT_LEAF))
{
poolBlk = searchBranch(node, fileBlk, &index);
CACHE_RELEASE(buf);
buf = NULL;
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_READ);
SET_DEBUG_ID(iomsg, 0);
buf = ZFS_ReadPoolBlk(&genMsg, &iomsg);
if (buf == NULL)
{
poolBlk = 0;
goto returnStatus;
}
node = (FmapNode_s *)buf->pBuf.data;
if ((node->head.magic != FMAP_BT_LEAF) &&
(node->head.magic != FMAP_BT_BRANCH))
{
SetErrno(&genMsg, zERR_MEDIA_CORRUPTED);
ZLSSPOOL_MediaIsCorrupt(&genMsg, buf, &iomsg);
CACHE_RELEASE(buf);
buf = NULL;
poolBlk = 0;
goto returnStatus;
}
}
poolBlk = searchLeaf(node, fileBlk, &retLen, &index);
CACHE_RELEASE(buf);
buf = NULL;
}
else
{
poolBlk = 0;
}
returnStatus:
if (retLen < *length)
*length = retLen;
return poolBlk;
}
/**************************************************************************
* This returns TRUE if "blockNum" is physically allocated in the file,
* it returns FALSE if the block is in a SPARSE hole or beyond the end
* of the file.
***************************************************************************/
BOOL ZFSVOL_VOL_isBlockInBeast(
RootBeast_s *beast,
Blknum_t fileBlk)
{
NINT len = 1;
ASSERT_MPKNSS_LOCK();
if ((fileBlk << beast->ROOTmycache.bufSizeShift) < beast->ROOTeof)
{
return (ZFS_fileBlk2volBlk(beast, fileBlk, &len) != 0);
}
else
{
return FALSE;
}
}
/***************************************************************************
* Read Ahead Pool Blocks.
* Setup IoMsg using READBLK_IO_MSG befor calling this routine
* (This macro will set the fileBlk to be the negative of the specified
* volBlk, as well as set the beast)
* Make sure that the beast is not X_LATCHED when ZFS_ReadAheadPoolBlk
* is called. It can be share latched.
***************************************************************************/
void asyncReadAheadPoolBlkDone(Fsm_s *fsm)
{
Asyncio_s *asyncio = STRUCT(fsm,Asyncio_s,fsm);
RootBeast_s *beast = STRUCT(asyncio->mycache, RootBeast_s, ROOTmycache);
ASSERT_MPKNSS_LOCK();
if (asyncio->buffer)
CACHE_RELEASE(asyncio->buffer);
COMN_UnlatchAndRelease(&beast, SLATCHED);
freeAsyncio(asyncio);
}
void asyncContinueReadAheadPoolBlk(FsmLite_s *fsmLite)
{
Asyncio_s *aio = STRUCT(fsmLite,Asyncio_s,fsm.lite);
ASSERT_MPKNSS_LOCK();
asyncReadVolBlk(aio, asyncReadAheadPoolBlkDone);
}
/** Initialize IoMsg_s using READBLK_IO_MSG before calling this routine **/
void ZFS_ReadAheadPoolBlk(
IoMsg_s *ioMsg)
{
Asyncio_s *asyncio;
ASSERT_MPKNSS_LOCK();
COMN_USE_BEAST(ioMsg->beast);
asyncio = getAsyncio();
INIT_AIO(asyncio, &ioMsg->beast->ROOTmycache, ioMsg->fileBlk, ioMsg->mode);
asyncio->volBlk = ioMsg->volBlk;
FSM_S_LATCH(&ioMsg->beast->ROOTbeastLatch, &asyncio->fsm.lite,
asyncContinueReadAheadPoolBlk);
}
/*- (FUNCTION) ----- ZFS_appendFileMap() ---------------------------------------
|
| do not allow this api to be exposed to the public. this api is used
| internal to zlss only and does not have logging, transactions, latching,
| etc.
| IPU uses this to fill in the file map before the NSS volume is real
| and before any external access.
|
+-------------------------------------------------------------------------*/
STATUS ipuUpdateSparseLeaf(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Extent_s *extent,
Blknum_t fileBlk,
Blknum_t holeBlkCnt,
Buffer_s *buf,
Buffer_s *bufParent)
{
ZFSStorageInfo_s *stInfo;
Fmap_s *fmap;
FmapNode_s *node;
Buffer_s *bufSibling = NULL;
IoMsg_s iomsg;
NINT index = 0, pIndex = 0;
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
node = (FmapNode_s *)buf->pBuf.data;
if (node->head.numRecs < (FMAP_MAX - 6))
{
node->extent[node->head.numRecs].count = fileBlk;
node->extent[node->head.numRecs].poolBlk = 0;
node->head.numRecs++;
node->extent[node->head.numRecs].count =
fileBlk + extent->lengthOfExtent;
node->extent[node->head.numRecs].poolBlk = extent->poolBlkNum;
node->head.numRecs++;
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
CACHE_DIRTY_RELEASE(buf);
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, NULL, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(buf);
return zFAILURE;
}
if (node->head.state & BT_ROOT)
{
zASSERT(bufParent == NULL);
XALLOC_SEED_IO_MSG(iomsg, beast, NULL, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
CACHE_RELEASE(buf);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, NULL);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(NULL, buf, bufParent, &pIndex, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
splitBtreeLeafSparse(NULL, buf, bufSibling, bufParent, index,
pIndex, extent, (NINT)holeBlkCnt, fileBlk);
CACHE_DIRTY_RELEASE(buf);
buf = NULL;
CACHE_DIRTY_RELEASE(bufParent);
bufParent = NULL;
CACHE_DIRTY_RELEASE(bufSibling);
bufSibling = NULL;
}
return zOK;
}
STATUS ipuUpdateSparse(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Extent_s *extent,
Blknum_t fileBlk,
Blknum_t holeBlkCnt)
{
ZFSStorageInfo_s *stInfo;
Fmap_s *fmap;
Buffer_s *buf = NULL;
Buffer_s *bufParent = NULL;
Buffer_s *bufSibling = NULL;
FmapNode_s *node;
IoMsg_s iomsg;
Blknum_t poolBlk;
NINT tmp;
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
if (fmap->root == INVALID_BLK_ZERO)
{
/* Making file sparse the first time, filemap is still direct,
* continuing entries will be in the btree */
XALLOCBLK_IO_MSG(iomsg, beast, NULL, CACHE_UPDATE);
if ((buf = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
stInfo->fmapTreeBlks++;
node = (FmapNode_s *)buf->pBuf.data;
fmap->root = buf->volBlk;
node->head.magic = FMAP_BT_ROOT;
node->head.fnh_internalID = beast->ROOTinternalID;
node->head.fnh_zid = beast->zid;
node->head.state = BT_ROOT | BT_LEAF;
node->head.leafLink = INVALID_BLK_ZERO;
node->head.lsn = 0;
node->extent[0].count = fmap->dirExt[fmap->numRecs -1].count;
node->extent[0].poolBlk = fmap->dirExt[fmap->numRecs -1].poolBlk;
node->head.numRecs = 1;
}
else
{
READBLK_IO_MSG(iomsg, beast, fmap->root, CACHE_UPDATE);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
zASSERT(node->head.magic == FMAP_BT_ROOT);
}
while (!(node->head.state & BT_LEAF))
{
poolBlk = node->extent[node->head.numRecs-1].poolBlk;
if (node->head.numRecs < (FMAP_MAX - 6))
{
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
bufParent = buf;
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, NULL, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(buf);
return zFAILURE;
}
if (node->head.state & BT_ROOT)
{
XALLOC_SEED_IO_MSG(iomsg, beast, NULL, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
CACHE_RELEASE(buf);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, NULL);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(NULL, buf, bufParent, &tmp, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
syncSplitBtree(NULL, buf, bufSibling, bufParent, extent);
CACHE_DIRTY_RELEASE(buf);
CACHE_DIRTY_RELEASE(bufParent);
bufParent = NULL;
bufParent = bufSibling;
bufSibling = NULL;
}
buf = NULL;
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_UPDATE);
if ((buf = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
return zFAILURE;
}
node = (FmapNode_s *)buf->pBuf.data;
zASSERT((node->head.magic == FMAP_BT_LEAF) ||
(node->head.magic == FMAP_BT_BRANCH));
}
if (ipuUpdateSparseLeaf(genMsg, beast, extent, fileBlk,
holeBlkCnt, buf, bufParent) != zOK)
{
return zFAILURE;
}
return zOK;
}
STATUS ipuUpdateBtreeFileMap (
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Extent_s *extent)
{
ZFSStorageInfo_s *stInfo;
Buffer_s *bufChild = NULL;
Buffer_s *bufParent = NULL;
Buffer_s *bufSibling = NULL;
FmapNode_s *child;
Fmap_s *fmap;
Blknum_t poolBlk;
IoMsg_s iomsg;
NINT tmp;
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
poolBlk = fmap->root;
ContinueScanningTheBtree:
READBLK_IO_MSG(iomsg, beast, poolBlk, CACHE_UPDATE);
if ((bufChild = ZFS_ReadPoolBlk(genMsg, &iomsg)) == NULL)
{
if (bufParent)
{
CACHE_RELEASE(bufParent);
}
if (bufSibling)
{
CACHE_RELEASE(bufSibling);
}
return zFAILURE;
}
child = (FmapNode_s *)(bufChild->pBuf.data);
zASSERT( (child->head.magic == FMAP_BT_LEAF) ||
(child->head.magic == FMAP_BT_ROOT) ||
(child->head.magic == FMAP_BT_BRANCH) );
if (child->head.state & BT_LEAF)
{
if ((child->extent[child->head.numRecs -1].poolBlk +
child->extent[child->head.numRecs -1].count -
child->extent[child->head.numRecs -2].count) == extent->poolBlkNum)
{
child->extent[child->head.numRecs -1].count+=extent->lengthOfExtent;
}
else if (child->head.numRecs < (FMAP_MAX - 6))
{
child->extent[child->head.numRecs].poolBlk = extent->poolBlkNum;
child->extent[child->head.numRecs].count =
child->extent[child->head.numRecs -1].count+extent->lengthOfExtent;
child->head.numRecs++;
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, NULL, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(bufChild);
return zFAILURE;
}
if (child->head.state & BT_ROOT)
{
XALLOC_SEED_IO_MSG(iomsg, beast, NULL, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
CACHE_RELEASE(bufChild);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, NULL);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(NULL, bufChild, bufParent, &tmp, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
syncSplitBtree(NULL, bufChild, bufSibling, bufParent, extent);
}
}
else
{
poolBlk = child->extent[child->head.numRecs-1].poolBlk;
if (child->head.numRecs < (FMAP_MAX - 6))
{
if (bufParent)
{
CACHE_DIRTY_RELEASE(bufParent);
bufParent = NULL;
}
bufParent = bufChild;
bufChild = NULL;
}
else
{
XALLOC_SEED_IO_MSG(iomsg, beast, NULL, 0, CACHE_UPDATE);
if ((bufSibling = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
CACHE_RELEASE(bufChild);
return zFAILURE;
}
if (child->head.state & BT_ROOT)
{
XALLOC_SEED_IO_MSG(iomsg, beast, NULL, 0, CACHE_UPDATE);
if ((bufParent = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
Extent_s localExt;
CACHE_RELEASE(bufChild);
localExt.poolBlkNum = bufSibling->volBlk;
localExt.lengthOfExtent = 1;
zfsFreeExtent(genMsg, beast->vol.zfsVol,
&localExt, NULL);
cacheReleaseToss(bufSibling);
return zFAILURE;
}
stInfo->fmapTreeBlks++;
syncGrowBtree(NULL, bufChild, bufParent, &tmp, beast);
fmap->root = bufParent->volBlk;
}
zASSERT(bufParent != NULL);
stInfo->fmapTreeBlks++;
syncSplitBtree(NULL, bufChild, bufSibling, bufParent, extent);
CACHE_DIRTY_RELEASE(bufChild);
bufChild = NULL;
CACHE_DIRTY_RELEASE(bufParent);
bufParent = NULL;
bufParent = bufSibling;
bufSibling = NULL;
}
goto ContinueScanningTheBtree;
}
CACHE_DIRTY_RELEASE(bufChild);
if (bufParent)
{
CACHE_DIRTY_RELEASE(bufParent);
}
if (bufSibling)
{
CACHE_DIRTY_RELEASE(bufSibling);
}
return zOK;
}
STATUS ipuUpdateDirectFileMap (
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Extent_s *extent)
{
ZFSStorageInfo_s *stInfo;
Buffer_s *buffer = NULL;
FmapNode_s *node;
Fmap_s *fmap;
Blknum_t poolBlk;
Blknum_t length;
IoMsg_s iomsg;
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
poolBlk = extent->poolBlkNum;
length = extent->lengthOfExtent;
if (fmap->numRecs == 1)
{
fmap->dirExt[fmap->numRecs].count = length;
fmap->dirExt[fmap->numRecs].poolBlk = poolBlk;
fmap->numRecs++;
}
else if ((fmap->dirExt[fmap->numRecs -1].poolBlk -
fmap->dirExt[fmap->numRecs -2].count +
fmap->dirExt[fmap->numRecs -1].count) == poolBlk)
{
fmap->dirExt[fmap->numRecs - 1].count += length;
}
else if (fmap->numRecs < MAX_DIRECT)
{
fmap->dirExt[fmap->numRecs].poolBlk = poolBlk;
fmap->dirExt[fmap->numRecs].count =
fmap->dirExt[fmap->numRecs -1].count + length;
fmap->numRecs++;
}
else
{
XALLOCBLK_IO_MSG(iomsg, beast, NULL, CACHE_UPDATE);
if ((buffer = ZFS_AllocPoolBlk(genMsg, &iomsg)) == NULL)
{
return zFAILURE;
}
stInfo->fmapTreeBlks++;
fmap->root = buffer->volBlk;
node = (FmapNode_s *)(buffer->pBuf.data);
node->head.magic = FMAP_BT_ROOT;
node->head.fnh_internalID = beast->ROOTinternalID;
node->head.fnh_zid = beast->zid;
node->head.state = BT_ROOT | BT_LEAF;
node->head.numRecs = 0;
node->head.leafLink = 0;
node->head.lsn = 0;
node->extent[node->head.numRecs].count =
fmap->dirExt[fmap->numRecs - 1].count;
node->extent[node->head.numRecs].poolBlk =
fmap->dirExt[fmap->numRecs -1].poolBlk;
node->head.numRecs++;
node->extent[node->head.numRecs].count =
fmap->dirExt[fmap->numRecs - 1].count + length;
node->extent[node->head.numRecs].poolBlk = poolBlk;
node->head.numRecs++;
CACHE_DIRTY_RELEASE(buffer);
}
return zOK;
}
/*****************************************************************************
** This routine is for IPU to call to add specific extents to the end of
** the file map for the specified beast.
**
** This routine does NOT: 1) check for latching
** 2) do transactions
** 3) do logging
**
** The beast is marked dirty when we are done. It does not change the
** logical eof.
**
** The only error cases that exist in this routine is if there is an error
** allocating and getting a buffer for a meta block to extend the filemap,
** or if there is an error reading an existing filemap metadata block.
**
** This routine will also take care of extending the file in a sparse manner.
** For sparse files holeBlkCnt represents the number of 4K blocks that should
** be sparse (not allocated) before adding the extent to the end of the file
** map.
**
** For non sparse files: holeBlkCnt should be set to 0.
**
****************************************************************************/
STATUS ZFS_appendFileMap(
GeneralMsg_s *genMsg,
RootBeast_s *beast,
Blknum_t holeBlkCnt,
Extent_s *extent)
{
ZFSStorageInfo_s *stInfo;
Fmap_s *fmap;
Blknum_t fileBlk;
stInfo = beast->storage.zfsInfo;
fmap = &stInfo->fmap;
fileBlk = stInfo->nextBlk + holeBlkCnt;
if ((fileBlk == 0) && (fmap->numRecs == 0))
{
/** Very first time adding a blk to the file **/
fmap->numRecs = 1;
}
if (holeBlkCnt)
{
if (ipuUpdateSparse(genMsg, beast, extent, fileBlk, holeBlkCnt) != zOK)
{
return zFAILURE;
}
}
else if (fmap->root == INVALID_BLK_ZERO)
{
if (ipuUpdateDirectFileMap(genMsg, beast, extent) != zOK)
{
return zFAILURE;
}
}
else
{
if (ipuUpdateBtreeFileMap(genMsg, beast, extent) != zOK)
{
return(zFAILURE);
}
}
stInfo->fmapDataBlks += extent->lengthOfExtent;
stInfo->nextBlk = fileBlk + extent->lengthOfExtent;
BST_MarkDirty(beast);
return(zOK);
}