--- linux-3.2-rc5/fs/ext2/ChangeLog.e2compr-26port 1970-01-01 01:00:00.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/ChangeLog.e2compr-26port 2011-12-13 14:22:47.822975235 +0100 @@ -0,0 +1,439 @@ + +e2compr - Released under the GPL V 2 license. + + +Installation: +============= + +1. gunzip: + > gunzip linux-3.1-rc3-e2c-0.4.58.patch.gz + +2. change to you kernel directory + +3. make clean: + > make clean + +3. patch: + > patch -p1 < ../patch/to/patch/linux-3.1-rc3-e2c-0.4.58.patch + + see if any rejects occured: + > find | grep .rej + + WARNING: All rejects must be fixed manually! + +4. config: + > make oldconfig + > make menuconfig + Now enable at least the ext2-compression feature: + Filesystems: + <*> Second extended fs support + [ ] Ext2 extended attributes + [ ] Ext2 execute in place support + [*] Ext2 file compression (DANGEROUS) + Ext2 file compression options ---> + +5. make: + > make + + +Building a patch: +================= + +files.txt: + +fs/ext2/ChangeLog.e2compr-26port +Documentation/filesystems/e2compress.txt +fs/ext2/Readme.e2compr +fs/Kconfig +include/linux/ext2_fs_c.h +fs/ext2/Makefile +fs/ext2/compress.c +fs/ext2/e2zlib.c +fs/ext2/adler32.c +fs/ext2/super.c +fs/ext2/ialloc.c +fs/ext2/balloc.c +fs/ext2/inode.c +fs/ext2/file.c +fs/ext2/ioctl.c +fs/ext2/ext2.h +include/linux/ext2_fs.h +fs/fcntl.c +mm/truncate.c +mm/swapfile.c +mm/filemap.c +mm/page_alloc.c + + +cat files.txt | xargs -n1 -I '{}' diff -pruNbB linux-3.1-rc3/'{}' linux-3.1-rc3-e2c/'{}' > ./linux-3.1-e2c-0.4.58.patch + + +Changelog: +========== + +25 August 2011 + Matthias Winkler + * released version 0.4.58 for kernel 3.1 + * file.c: i_alloc_sem was removed. I am not sure if only holding i_mutex + will be enough. See http://patchwork.ozlabs.org/patch/101859/. + In ext2_file_write() I replaced: + + mutex_lock(&inode->i_mutex); + - down_read(&inode->i_alloc_sem); + + atomic_inc(&inode->i_dio_count); + + - up_read(&inode->i_alloc_sem); + + inode_dio_done(inode); + mutex_unlock(&inode->i_mutex); + + The main prupose of i_dio_count is blocking vmtruncate_range() + as long as the i_dio_count is greater than 0. In other words, + all direct io must be completed before truncating is allowed. + + * file.c: generic_osync_inode was removed from mm - added functionality to + file.c as ex_generic_osync_inode() + * file.c: changed: &inode_lock to &inode->i_lock + * ext2_warning() replaced by ext2_msg() + * compress.c: vfs_dq_init(inode) replaced by dquot_initialize(inode) + * compress.c: ext2_truncate(inode) replaced by + ext2_truncate_blocks(inode, inode->i_size) which looks like + exactly the same! + * inode.c: dentry->d_lock now seems to need + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED) held. + * compress.c, inode.c: added might_schedule() before wait_on_buffer() + statements to assure we are not atomic at this point. + * truncate.c: removed patch from memory.c and moved it to truncate.c + as surrounding kernel code also moved there. vmtruncate() was + split in truncate_setsize() and truncate_pagecache() with kernel 3.1 + + +10 August 2009 + Matthias Winkler + * released version 0.4.58 + * merged assert.h and debug.h into ext2_fs_c.h + * merged NDEBUG into EXT2_COMPR_DEBUG + * disabled adler cheksums on "read" if not defined EXT2_COMPR_DEBUG. + * merged none.c into compress.c + * inserted multiple defines "CONFIG_EXT2_COMPRESS" to allow disabling + of ext2compression with patched sources. + * re-inserted EXPORT_SYMBOL(__pagevec_free) to support ext2 as module + +05 August 2009 + Matthias Winkler + * released version 0.4.57 + * ported to kernel 2.6.30: + inode.c: after fix of generic ext2 ext2_get_blocks() needed to remove bforget. + * integrated SMP from version 0.4.56 + * per CPU one separate read and one separate write working area + * removed all external compression codecs + * removed "verify compression" (never helped to find a bug anyway) + * Lindent'ed all source and header files + +01 August 2008 + Matthias Winkler + * released version 0.4.55 + * complete code cleanup + * changed policy to ALWAYS_LOCKING pages in do_generic_mapping_read() + => completely removed PG_Compr-Flag now! + +31 July 2008 + Matthias Winkler + * released version 0.4.54 + * fixes rare himem bug: only occures if page > cluster in inode.c/readpage() + * fixes rare readpage bug in mm/filemap.c/do_generic_mapping_read(): + PG_Compr flags dissallow reading a page while de/compressing. + Setting and unsetting it requires the page lock, with one exception + do_generic_mapping_read() in filemap.c. This is done because of performance + reasons. Anyway, a simultaneous call of do_generic_mapping_read() for the SAME + page might break the PG_Compr-Mimic. + + Solutions: Always lock any page before reading OR second(n-th) call of + do_generic_mapping_read() busy waits until first is done. + Default is busy wait now, ALWAYS_LOCK implemented as option via define. + +25 June 2008 + Matthias Winkler + * released version 0.4.53 + * fixes himem bug: unmapped block in ext2_decompress_cluster() + * fixes bdev bug: ext2_get_block() must be called for every block + which cause ooops because of bdev == NULL. ext2_get_block() will + set the correct bdev and the correct blocknumber of the block. + + NEVER assign bdev manually, because the blocknumber might be random then: + "block->b_bdev = something" (DON'T!) + + ALWAYS use: + if (!buffer_mapped(block)) || (block->b_bdev == NULL) + ext2_get_block() + + Bdev bug is closely related to file holes (empty block in a file). + If compressed data will be written to a former hole, then + usually ext2_get_block() must be called with create. + ext2_get_block( , , , 1 /*create*/). + + * fixed missing include in xattr.h + * EXT2_COMPRBLK might be removed during compression if a cluster + doesn't compress. During compression we re-raise EXT2_COMPRBLK + flag after every cluster now. + * added missing export of __pagevec_free to (mm/page_alloc.c) + * deny O_DIRECT access mode after open of a file using fcntl() + (in fs/fcntl.c). + * file.c: + Replaced ext2_filew_write() to use kernels generic + do_sync_write(). Writing on compressed files calls + ext2_filew_write(): + - divide write range into clusters + - ext2_decompress_cluster (if needed) + - do_sync_write() + - ext2_compress_cluster (if needed) + * inode.c: + ext2_writepage()/ext2_writepages() usually writes back + dirty pages of an inode. They reside in the kernels page cache. + This pages might e.g. be written/dirtied by a mmap()-ped file. + Also generic_file_aio_write() uses ext2_writepage() finally. + I don't see how the ext2_writepage() would handle compressed + files, so I re-inserted and re-wrote this part of old 2.4 code. + Don't know if this code (USE_WRITEPAGE) is needed at all. + So I leave it disabled by default. Enabled it might + leave compressed files with compression ratio of 100%. + Don't use yet! + +17 April 2008 + Matthias Winkler + * first patch for kernel 2.6.25 released + +20 March 2008 + Matthias Winkler + * version 0.4.52: EXT2_COMPRESS_WHEN_CLU didn't work. this + feature enables compression during file write. + +15 Oct 2007 + Matthias Winkler + * First offical Sourceforge release as version 0.4.51 + * TODO: figure out what is necessary to enable swap + suppport for e2compr again (see mm/swapfile.c). + +27 Sep 2007 + Matthias Winkler + * System stalled with a lot of I/O during de-compression of + USB-Sticks, too. I replaced mark_buffer_dirty + with set_buffer_dirty. This achieves that ONLY the buffers + and not the pages are marked. Then I write back the + buffers with ll_rw_block() at the end of + ext2_decompress_cluster() and ext2_decompress_pages(). + This should stop flooding the system with dirty pages. + Because now every routine waits for its newly dirtied buffers. + My system with 128MB of RAM is responding much more better during + compression/decompression now. Desompression also seems + to be a bit faster. + (this change is active with: #ifndef E2C_GENERIC_OSYNC) + +25 Sep 2007 + Matthias Winkler + * System stalled with a lot of I/O during compression of + USB-Sticks. Seems generic_osync_inode() should not be + called in ext2_compress_cluster. Therefore I replaced + it with ll_rw_block() to write the modified blocks + directly back to disk. This gave also a ~100% better + performance for compression. + +9 Sep 2007 + Matthias Winkler + * fixed bdev-bug. this bug appeared primarily when + files contained holes. A page with holes, which + was dirty caused ext2_get_cluster_blocks [ext2_get_block()] + to create ALL blocks of the page, even if there were holes! + These allocated hole-blocks weren't set to 0 anywhere and + therefore contained invalid data. I changed the + code to never allocate these holes. + + * ext2_truncate() added again to ext2_compress_cluster for + uncompressed clusters. Fixes filesize errors reported by + "e2fsck -f /dev/..." + +24 Aug 2007 + Matthias Winkler + + Major changes: + * completly ported inode->i_mutex + + * clever CONFIG_GZ_HACK to reject "uncompressable" files + (according to their extension) early. The IOCTL in ioctl.c + which sets the compression on the file already rejects such + extensions now. + + * new create_empty_buffers_e2c() was necessary, because the + "extra"-pages should NOT have a valid i_mapping! Further the + buffers needed to be initalized right. + + * proper block initalization (bdev-bug) in: + - create_empty_buffers_e2c() + - ext2_get_cluster_blocks + + * in file.c copied: + ...with one single change at ext2_mapping_read in label page_ok: + A new Page-Flag (page-flags.h) the so called "PG_compr"-Flag is + checked to assure the corresponding page is not under + compression/decompression. This was necessary because + generic_mapping_read() doesn't lock() the page in ALL cases!!! + Otherwise the generic_mapping_read() would have to lock EVERY page + in the whole system before returning it.... + + * Fixed HiMem-Support: Balanced ALL kamp/kunmap calls. Unbalanced + functions cause the system to hang at "kmap_himem()" after some + time. Can be seen with magic-sysctrl "altgr + prtscr + W". + + * ext2_decompres_cluster() didn't mark uptodate pages for writeback. + Don't know how this method could EVER have worked... + + * ext2_compress_cluster() caused an always increasing amount of dirty-pages + (cat /proc/vmstat) which couldn't be wrote back by sync/umount. + I think this was due the ClearPageDirty at the end of ext2_compress_cluster(). + + * introduced ext2_get_dcount() to savely determine if a file is really "open" + and to abort compression/decompression in such a case. + + * Removed gzip completely and not working assembler code. Replaced by the + kernels built-in zlib, which is pretty the same code... + + * New kernel configuration interface + + * Rollback of some unecessary "fixes"... + + TODO: + + * HiMem-Support: + One might try to use kmap_atomic instead of kamp in ext2_readpage. kmap_atomic + doesn't block and might speed up the regular page reading. might. + +20 April 2007 + Andreas: + + * Replaced GZIP with zlib of the kernel because the assembly versions of existing + compression modules crashed. + + * Replaced gzip with the kernel zlib, which is built-in anyway + + * Initial HiMem-Support. + + +06 Mar 2007 + + Terry Loveall + + * adapted linux-2.6.10-e2compr-0.4.45-alpha0126.diff to 2.6.18.5 kernel + + * replaced most instances of down/up(inode->i_sem) with + lock/unlock(inode->i_mutex). For exception see file.c, below. + + * made various printk regularizations to uniquely identify each printk + instance. Inserted missing KERN_DEBUG and KERN_WARNING. + + * compress.c: + bug fix: ext2_count_blocks: init head_bh for each iteration. + bug fix: ext2_count_blocks: add set clen=ulen for uncompressable clusters. + bug fix: ext2_compress_cluster: replacement and inlining of an + invalidate_inode_buffers function to keep root filesystem changes + uptodate on disk (prevents umounting root file system to update). + warning fix: ext2_compress_cluster: various variables initialized. + ext2_compress_cluster: removed #ifdef NDEBUG + bug fix: ext2_compress_cluster: defined maxclus, calculate and set for: + bug fix: ext2_compress_cluster: set filesize for uncompressed clusters. + ext2_cleanup_compressed_inode: changed error message to indicate 'Z' + flag was caused by trying to un/compress already open file. + bug fix: cp to compr dir: Truncate uncompressed files to their + uncompressed length, i.e. force kernel to update inode and sb + + * file.c: + removed file->f_error code since f_error no longer in file struct. + ext2_file_write: changed down/up i_sem to down_read/up_read i_alloc_sem + + * inode.c: + bug fix: ext2_get_block: restored changed: loop to bforget + + * ioctl.c: + ext2_ioctl: scrubbed 'B' flag on file uncompress. + + * match[56]86.S: + made code dependent on #ifdef CONFIG_REGPARM to compile with either + register variable or stack variable parameter passing. + +28 Feb 2005 + + Yabo Ding , + + * Corrected page unlocking in inode.c. + +19 Feb 2005 + + Paul Whittaker + + * Added corrections le32_to_cpu in critical areas of compress.c + * Optimized function exit code in inode.c. + +24 Aug 2004 +Yabo Ding , + + compress.c +* ext2_decompress_pages() + The old code cannot reread data from disk to a changed buffers data pointer in 2.6.x. + So, I copy memory data(decompressed) to a temporary buffer; + Then reread data(compressed) from disk, and copy to head; + Then copy back the memory data from temporary buffer. + It seems clumsy, but it works well. +* ext2_compress_cluster() + Force write to disk. + + inode.c +* ext2_writepage() + Delete old code. All directly call block_write_full_page() function. + +* ../Kconfig + Change e2compr config as a submenu config + +04 Aug 2004 + +Paul Whittaker + +* compress.c: replaced mark_buffer_dirty(x,y) with mark_buffer_dirty(x). I'm + still not at all sure that this is sufficient. + +03 Aug 2004 + +Paul Whittaker + +* ../../include/linux/ext2_fs_c.h: added missing prototypes for ext2_iLZRW3A(), + ext2_iLZRW3A(), ext2_rLZRW3A(). + +02 Aug 2004 + +Paul Whittaker + +* ../../mm/page_alloc.c: added EXPORT_SYMBOL(__pagevec_free). + +* ../../include/linux/pagemap.h, ../../mm/filemap.c: removed inline from + __grab_cache_page() declarations, added EXPORT_SYMBOL(__grab_cache_page). + +* ../../include/linux/mm.h, ../../mm/filemap.c: removed inline from + page_waitqueue() declarations, added EXPORT_SYMBOL(page_waitqueue). + +* bzip2/{lib_bzip_d,lib_bzip_e}.c, {gzip,lzo,lzrw3a,lzv1}/e2compr*.c: + replaced MOD_INC_USE_COUNT and MOD_DEC_USE_COUNT with try_module_get() + and module_put() to avoid deprecation and safety warnings. + +* lzrw3a/lzrw3a.c: added (UBYTE *) casts to avoid compiler warnings. + +* compress.c, inode.c: incorporated Yabo's changes, correcting mistakes in + ext2_readpages() in inode.c. + +* removed printks for ext2_discard_prealloc from file.c and inode.c (not + needed now that this problem has been resolved). + +2.6.5 -> 2.6.7 updates: + +* ../../mm/filemap.c: rewrote CONFIG_EXT2_COMPRESS hunk for 2.6.7. + +* compress.c, file.c: use mapping_mapped(), since mapping->i_mmap has changed + and mapping->i_mmap_shared no longer exists. + +* inode.c: page->count becomes page->_count. --- linux-3.2-rc5/Documentation/filesystems/e2compress.txt 1970-01-01 01:00:00.000000000 +0100 +++ linux-3.2-rc5-e2c/Documentation/filesystems/e2compress.txt 2011-12-13 14:22:47.824975303 +0100 @@ -0,0 +1,116 @@ +Transparent compression for ext2 filesystem +=========================================== + +What this document is. +---------------------- +This document is intended for explaining how e2compress has been implented/ported +in kernel 2.4. It also give a status of current work. You need to have e2compress +knowledge (i.e. to know how e2compress works, from a general point of view) + +What this document is not. +-------------------------- +This document is not a full explaination of how e2compress work. For this, +there are other documents such as fs/ext2/Readme.e2compr file for the technical +point of view and user manual can be found at . +This site is also a place were you will find many information about e2compress +development for kernel 2.4, tools, manuals and so on. + + +Introduction +============ + +This is a first adaptation of e2compress for kernel 2.4. The work has been done +by Alcatel (Alcatel Business Systems - R&D) at Illkirch. It has been started +from the latest patch provided by Peter Moulder for kernel 2.2, +i.e. e2compr-0.4.39-patch-2.2.18. +It is full compatible with previous version. +Here after you will first find some explainations about the choices mades for +the development, and then the status of current work from functionnal point of +view. + + +Development +=========== + +As for previous patches, most interesting happens when reading in ext2_readpage +and when writing in ext2_writepage and ext2_file_write. +In fact, in 2.2 kernel, compression occures on cluster of blocks. So when reading +or writing a part of a file, we first have to compute the cluster on which I/O +occures, then we have to get every buffers of the cluster, uncompress the data if +needed, then reading/writing happens "as for normal files". +In 2.4 kernels, I/O occures through page cache: i.e. when reading/writing to a +part of the file, first the corresponding page is get, we then get the needed +buffers, which point to the page; this means that for keeping same work as for 2.2, +we have to use the notion of cluster of page. For getting every buffers of a cluster, +we first get every pages of the cluster, then get buffers of every pages... + +So, things happens as follow: + +ext2_readpage +------------- +If data corresponding to the page are in a compressed cluster, this functions perfoms +more works: instead of reading one page, it reads the whole "cluster of pages". +In fact, anyway, we have to read all compressed buffer. Once we have got all buffers +of the cluster, uncompressed (at least a part of) the data, and located the part of +the uncompressed data which correspond to the requested page, there is not any more +lot of work for also reading (i.e. doing some memcpy) other pages belonging to this +cluster. +So, the first reading of the first page of the cluster is quite longer, but then, +every pages of the cluster are uptodate in the cache. + +ext2_writepage +-------------- +An overhead has been added for pages belonging to a compressed cluster. +In fact, if cluster is still compressed on the disk, we can't directly write the +page (which contains uncompressed data) in the middle of a compressed cluster. +So, we first have to uncompress the whole cluster on the disk, then we can write the +new data of the dirty page(s). + +ext2_file_write +--------------- +This replaces `generic_file_write' when e2compress option is activated. +It is a copy of `generic_file_write'. The main difference is that instead of looping +page by page in `generic_file_write', we loops on cluster of page. +In each loop: + * we compute the cluster on which beginning of data (to be written) belongs to. + * then, we get all pages of the cluster. + * If cluster is a compressed one, we read all pages, and uncompress it. + Otherwise, we perfoms a `prepare_write' (as in generic_file_write). + * We copy the data on each page from user space, + * Call `commit_write' on dirty pages. + * When reaching end of cluster, we compress it. (As in 2.2) + +Note: Another implentation could have been to keep generic_file_write, and add an overhead +to `ext2_prepare_write' and `ext2_commit_write'; on the first access to a page of a compressed +cluster, whole cluster will be uncompressed (i.e. all pages of the cluster will be read and +uncompressed in `ext2_prepare_write') and when commiting the last page of the cluster, +compression occures... + +ext2_open_file +-------------- +In 2.4.16 kernel, this function has been added for treating the case of files opened for +"direct IO". Direct IO is not supported on compressed file. So opening a file by this way +is forbidden. + +Other places in ext2 +-------------------- +Other changes occures as in 2.2 for managing the compression flags of files and specific +`COMPRESSED_BLK_ADDR' address for compressed blocks. +So please, refer to existing documentation for 2.2 about this topic. + +Status +====== +Today (middle of december 2001), e2compress on kernel 2.4.16 has been tested on i386 +architecture, is used with success by tens of people in the department from some weeks. +It is full fonctionnal on ix86, full compatible with 2.2 version of e2compress. +It should work on other architecture, but has NOT been tested. +Please, note the following: + * No performance tests have been done. + * I don't proclaim that code is optimized (and it is probably not, but I hope that + "gurus" will not find it too bad) +So, I think I can say that there is no known "big" bug or "blocking" bug. + +Some strange things has been observed in very limit case, i.e. when memory is overloaded. + + +As usual, this e2compress comes without warranty, use it at your won risk, etc... --- linux-3.2-rc5/fs/ext2/Readme.e2compr 1970-01-01 01:00:00.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/Readme.e2compr 2011-12-13 14:22:47.825975345 +0100 @@ -0,0 +1,511 @@ + + 0. Introduction + ~~~~~~~~~~~~~~~ + +This file gives some technical information on e2compr and how it's +implemented. + +More general information on e2compr can be found at +http://e2compr.sourceforge.net/. + +The first couple of sections of this document are written for those +who have no interest in the source code but just want to know enough +to be able to predict and understand e2compr behaviour and its +implications. + +Section 3 describes the e2compr-specific ext2 attributes for a file +(i.e. chattr things). + +Section 4 describes the e2compr ioctls from the point of view of a +user-mode C programmer. + +Section 5 gives more detail about the file format on disk. + +Section 6 gives details on what's written where, i.e. a map of e2compr +code in the kernel. + + +Authorship: section 2 is written mainly by Antoine; the remainder is +written by Peter. + +Questions should be sent to the e2compr mailing list, +e2compr-misc@lists.sourceforge.net, or to the current maintainers, +bothie@users.sourceforge.net and whitpa@users.sourceforge.net. + + + 1. The idea + ~~~~~~~~~~~ + +See section `E2compr implementation' in the main e2compr texinfo +documentation for an introduction to how e2compr works. (Type +`info "(e2compr)Implementation"' at the shell prompt.) It was +originally written as part of the file you're now reading. + + + 2. More details + ~~~~~~~~~~~~~~~ + +Every compressed file stores its cluster size in the inode structure +(in the ext2 attribute flags field). +This (the cluster size) is the most important information: when +knowing the cluster size, we can convert a block number into a cluster +number, get the cluster the block belongs to, and then get the block. +The inode's flags field also keeps the algorithm that is used to compress data +written to the file. + +(The algorithm that was used to compress a given +cluster is stored in the cluster head near the beginning of the +compressed data. This may differ from the current algorithm +identified in the inode, which is only used to determine which +algorithm to use at the time clusters are written.) + +The algorithm id and the cluster size are stored in the i_flags field +(thus reducing the number of possible flags). We also create some new +flags: the COMPRBLK flags tells if there is at least one compressed +cluster in the file, the ECOMPR flag indicates that an error (related +to compression) occurred while reading from or writing to this file. +If it is set, the file becomes read-only. (In previous releases, you +were denied even read access to the file unless you set the NOCOMPR +flag. There might be some benefit in returning to the old behaviour +if decompressing erroneous data can cause an OOPS, but I think it +would be better to correct the decompressors. Others may disagree, +pointing out that it costs CPU time to check for incorrect data.) + +Beside the information stored into the inode, each cluster holds some +data. Here is the cluster_head structure for e2compr-0.4: + +struct ext2_cluster_head { + __u16 magic; /* == EXT2_COMPRESS_MAGIC_04X. */ + __u8 method; /* compression method id. */ + __u8 holemap_nbytes; /* length of holemap[] array */ + __u32 checksum; /* adler32 checksum. Checksum covers all fields + below this one, and the compressed data. */ + __u32 ulen; /* size of uncompressed data */ + __u32 clen; /* size of compressed data (excluding cluster head) */ + __u8 holemap[0]; /* bitmap describing where to put holes. */ +}; + +The `magic' field is a magic number. It is used to detect filesystem +corruption, and can also be used for data recovery purposes. (The +e2compress program for e2compr-0.3 does this.) + +The `checksum' field contains an Adler-32 checksum on the fields below +it in the struct and the compressed data. Its purpose is to protect +us from buffer overruns caused by corrupted data. + +The `ulen' field says how many bytes are stored in the cluster, when +uncompressed. + +The `clen' field says how many bytes are held in the cluster, when +compressed. + +The `method' +field identifies the algorithm that was used to compress the cluster +(this id will be used to uncompress the cluster, not the one stored +into the inode that will be used only to compress a new cluster). + +The variable-length `holemap' array says where to put hole blocks when +decompressing data. The `holemap_nbytes' field gives the length of +this array. Iff holemap_nbytes is zero then there are no holes (other +than at the end of the cluster, as determined by ulen versus cluster +size). + +The compressed data immediately follows the holemap array (with no +padding before it). + + +Compressing a cluster is done in the following way: We first get every +block in the cluster and compute the bitmap. We then compress the +non-hole data, and store back the compressed data into the existing +blocks. Unused blocks are then freed. + +Decompressing a cluster is done in the following way: We get the +cluster head and retrieve the bitmap. Missing blocks are allocated and +put where the bitmap says, and then compressed data is decompressed and +stored back into the blocks. + + +Reading from a compressed cluster is really easy: get the blocks, +decompress them into a working area, and get the bytes we want from +the working area. Writing to a compressed cluster is done by first +decompressing the cluster, and then write to it, as if it were a +normal file. The file is then marked so that the cluster will be +recompressed later. [pjm: Do we decompress the cluster even if it's +to be entirely written over?] + +In the current version, compression really occurs only when the inode +is put (which in turn only occurs when no processes have the file +open). This may change. + + + 3. Ext2 file attributes + ~~~~~~~~~~~~~~~~~~~~~~~ + +Attribute Lsattr Meaning +~~~~~~~~~ ~~~~~~ ~~~~~~~ +EXT2_SECRM_FL s Secure deletion (not yet implemented) +EXT2_UNRM_FL u Undelete-able. (Not yet implemented.) +EXT2_COMPR_FL c Future writes to this file should be compressed. + (Clearing this flag decompresses the file if it + is a regular file and there is space to do so; + see the e2compr FAQ for details.) +EXT2_SYNC_FL S Synchronous updates. (As far as I know, this is + not yet fully implemented.) +EXT2_IMMUTABLE_FL i Immutable file. +EXT2_APPEND_FL a Writes to file may only append. +EXT2_NODUMP_FL d Not a candidate for backup with dump(8). +EXT2_NOATIME_FL A No access time updates. +EXT2_DIRTY_FL Z De/compression is yet to happen. Read the + source for exact meaning. +EXT2_COMPRBLK_FL B File contains one or more compressed clusters. +EXT2_NOCOMPR_FL X Access raw compressed data. This isn't really + supported at the moment; user-space access is + yet to be worked out for 0.4. +EXT2_ECOMPR_FL E Compression error associated with this file +EXT2_BTREE_FL I B-tree indexed directory (seemingly not yet implemented) +EXT2_RESERVED_FL - (reserved for ext2 lib) + +See the chattr(1) man page for more verbose descriptions of the +non-e2compr flags. + + + 4. Ioctls available + ~~~~~~~~~~~~~~~~~~~ + + In brief + ~~~~~~~~ + +Action Ioctl To kernel From kernel +~~~~~~ ~~~~~ ~~~~~~~~~ ~~~~~~~~~~~ +Get cluster bit EXT2_IOC_GETCLUSTERBIT Cluster num 1 or 0 (cmp,uncmp) +Recognize compressed Cluster num - + EXT2_IOC_RECOGNIZE_COMPRESSED +Get algorithm EXT2_IOC_GETCOMPRMETHOD - Id +Set algorithm EXT2_IOC_SETCOMPRMETHOD Id - +Get cluster size EXT2_IOC_GETCLUSTERSIZE - Cluster size +Set cluster size EXT2_IOC_SETCLUSTERSIZE Cluster size - +Get attributes EXT2_IOC_GETFLAGS - Flags +Set attributes EXT2_IOC_SETFLAGS Flags - +Get block size FIGETBSZ - Block size + +#include to use any of these ioctls, except FIGETBSZ, +which requires . + +To find out what errors can be returned by these ioctls, read +fs/ext2/ioctl.c (for all of the above ioctls except FIGETBSZ) or +fs/ioctl.c (for FIGETBSZ). + + + Setting or testing a cluster bit + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[Note: user-space access to compression details are yet to be worked out, +so this section may not be accurate.] + +EXT2_IOC_GETCLUSTERBIT sets *arg to 1 if the specified cluster (0 for first +cluster, 1 for second, etc.) is stored in compressed form. + +To make the kernel consider a certain cluster to be compressed (after +you've done the compression yourself, in user space), use +EXT2_IOC_RECOGNIZE_COMPRESSED. This ioctl checks the validity of the +cluster's data, then marks it as compressed (if valid). This ioctl +requires special priveleges, because if the compressed data is not +valid then it may be possible to crash the system (due to buffer +overruns). + + + Setting or getting the compression algorithm + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +EXT2_IOC_SETCOMPRMETHOD sets the default compression method (stored in +the inode). This is the compression method that is used for future +writes. In the current version of e2compr [accurate at 0.4.36], this +does not cause a change to how +existing clusters are stored, except when the compression method +changes from `none' to something else, in which case the kernel +attempts to compress ,all currently-uncompressed clusters` using the +new algorithm. It is an error to use this ioctl on a file without the +compressed attribute. + +EXT2_IOC_GETCOMPRMETHOD sets *arg to the current compression method. + +In either case, Id is one of: EXT2_DEFER_METH, EXT2_LZV1_METH, +EXT2_AUTO_METH, EXT2_NEVER_METH, EXT2_BZIP2_METH, EXT2_LZO1X_1_METH, +EXT2_LZRW3A_METH (deprecated), EXT2_GZIP1_METH, EXT2_GZIP2_METH, ..., +EXT2_GZIP9_METH. + + + Setting or getting the cluster size + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +EXT2_IOC_SETCLUSTERSIZE sets the cluster size to the value of *arg. +This ioctl fails if there are already compressed clusters in the file +(as determined by checking the EXT2_COMPRBLK_FL attribute). + +EXT2_IOC_GETCLUSTERSIZE sets *arg to the current cluster size. +Surprisingly, this ioctl succeeds even if the EXT2_COMPR_FL attribute +is clear. (Maybe this will change in future, since the result is +meaningless.) + +In either case, the size is one of {4, 8, 16, 32}, and represents the +number of blocks per cluster. To convert to or from a number of +bytes, use the FIGETBSZ ioctl. + + + Setting or getting the ext2 file attributes + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) are not +e2compr-specific, but some attributes are e2compr-specific. + +*arg consists of the set of attributes for that file OR'ed together. +E.g. a value of (EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_NODUMP_FL) +for a regular file means that the file contains one or more compressed +clusters, and should not be backed up when using dump(8). + +See section 3 for a description of the various attributes. + +Note that although the compression method and cluster size are +physically stored in the flags field on disk this information is +masked out (i.e. set to zero) for GETFLAGS if the kernel has e2compr compiled in. +If the kernel does not have e2compr compiled in, then this information +is not masked out. See section 5 for how the cluster size and +compression method is stored if you wish to work with ,kernels without +e2compr`. + + + Getting the block size + ~~~~~~~~~~~~~~~~~~~~~~ + +This ioctl (FIGETBSZ) is not e2compr-specific, but is useful in +interpreting a cluster size (which is specified as a number of blocks +rather than bytes or kilobytes). + +*arg is set to the block size (in bytes) of the file. For ext2 files, +this is one of {1024,2048,4096}. It is the same value for all files +on the same filesystem. + +You must #include to use this ioctl (unlike the rest of +the ioctls listed here, which require ). + + + 5. File format + ~~~~~~~~~~~~~~ + +A note on byte ordering. All current versions of the kernel and +e2compr write to disk in little-endian format, so the 16-bit number +`0x8EC7' would be written as a 0xC7 byte followed by a 0x8E byte. +Unless you want to know the most general rule for byte ordering, you +can skip to the `Inode' heading. + +In kernel 2.0, the ext2 fs is written to disk in the native byte +ordering. On x86 machines, this means little endian; most other +architectures are big-endian (so the same 16-bit number would be +written as an 0x8E byte followed by 0xC7). + +On kernel 2.1 and later, the ext2 fs (including e2compr data) is +written in little-endian order regardless of the host architecture. + + + 5.1. Inode + ~~~~~~~~~~ + +fs/inode.c controls the reading and writing of inode information +to/from disk; consult this file (functions ext2_read_inode(), +ext2_update_inode() and/or ext2_write_inode()) for any detail omitted +from this section. + +The physical structure of an inode is struct ext2_inode (defined in +include/linux/ext2_fs.h). + + +The i_flags member contains the ext2 file attributes, as well as +cluster size and compression method. + +The normal flags are stored in the low 23 bits. Only the low 12 bits +are defined at present, including 4 flags introduced by the e2compr +patch. See ext2_fs.h for the flag meanings (search for +EXT2_SECRM_FL). + +Bits 23 through 25 hold the cluster size, or more precisely the log2 of +the number of filesystem blocks per cluster (excluding the first cluster; +see ext2_first_cluster_nblocks in include/linux/ext2_fs_c.h). + +Bits 26 through 30 store the compression method. See the definitions +for EXT2_LZV1_METH etc. in ext2_fs_c.h for the interpretation. + +Bit 31 is reserved for ext2 lib (which means that programs like e2fsck +store things there during its operation but it isn't used by the +kernel). + + + Data blocks + ~~~~~~~~~~~ + +Uncompressed clusters are stored just as they would be without +e2compr. So if there are no compressed clusters then the file +is stored identically to any other file. + + +If a cluster is compressed, then the first non-hole block starts with +a `cluster head', as defined in struct ext2_cluster_head in ext2_fs.h. + +The magic number (i.e. the value of the `magic' field) is 0x8ec7. +`method' holds one of EXT2_LZV1_ID and the like. `reserved_0' +contains zero. `ubitmap' describes where the uncompressed data goes. +(Recall that when we compress a cluster, we only compress the data +from non-hole blocks, so we need to know where the holes and non-holes +go when we decompress the data.) A `0' bit means a hole and a `1' bit +means a data block; bit 0 refers to the first block, b1 the second, +and so on. + + +The block positions within the file where the compressed data is held +is a subset of where the uncompressed data would be held. Further, if the +uncompressed data occupies u non-hole blocks and this compresses to c +blocks, then the compressed data occupies the first c non-hole blocks +of the file (and the remainder are freed). + +[This paragraph is an expansion of the preceeding: if you understood +the preceeding paragraph then skip this one.] Consider an array +cblock[] where cblock[0] holds the block number on disk (or 0 to +represent a hole) of the first block of a certain cluster of a file, +cblock[1] the second, and so on. (If you are familiar with the bmap +array or the format of first-level indirect blocks, then cblock[] is a +section of that array.) Suppose that the cluster size of this file is +16 blocks. Suppose too that, when uncompressed, blocks 0, 1, 5 and 6 +of the cluster are holes but the other 12 blocks (2,3,4,7,8,...,15) +contain data. (Thus the bitmap is 0x0000ff9c.) Now if we compress this +cluster to just 5 blocks, then cblock[0], [1], [5] and [6] will continue +to be holes, ,the positions of the compressed data blocks` are stored in +cblock[2], cblock[3], [4], [7] and [8], the blocks referenced by +cblock[9] through cblock[15] are freed, and cblock[9] through cblock[15] +are set to zero. + + + 6. What's coded where + ~~~~~~~~~~~~~~~~~~~~~ + +File names in this section are relative to linux/fs/ext2, except for +ext2_fs.h which is in linux/include/linux. + +Most of the action happens in compress.c; though note that a few +small, commonly-used routines are written as inline functions in +ext2_fs.h. + +ext2_readpage() and ext2_mmap() are in file.c. ext2_file_write() is +also there. + +Routines to read/write the inode from/to disk are in inode.c. + +super.c contains some e2compr initialisation code (such as allocating +the e2compr work area). + +All ioctl handling is in ioctl.c. + +acl.c is where we deny open() access in a couple of situations (if the +EXT2_NOCOMPR_FL is set and another process has the file open; and we +deny write access to a file with EXT2_ECOMPR_FL set). + +ialloc.c contains code in ext2_new_inode() for newly-created files to +inherit compression attributes from the directory in which they're +created. + +truncate.c handles truncation, i.e. zeroing any part of the cluster +bitmap that's been truncated, and decompressing the final cluster (but +marking dirty so that we try to recompress it on file close) if the +new size is part-way through a compressed cluster, so that zeroing +over the truncated data works. + +linux/include/linux/ext2_fs_i.h has the definition of the +ext2-specific parts of the in-memory inode. (The on-disk inode is +defined in ext2_fs.h.) + +linux/mm/filemap.c is also interesting, though there's no +e2compr-specific code there. Similarly linux/include/linux/mm.h and +linux/include/linux/fs.h. + +generic_readpage() is in linux/fs/buffer.c. Also all buffer handling. + + +The cleanup scheme +~~~~~~~~~~~~~~~~~~ + +inode->u.ext2_i.i_compr_flags has only a single bit defined: +EXT2_CLEANUP_FL. This bit gets set to 1 to indicate that +ext2_cleanup_compressed_inode() needs to be called. + +There is a related flag stored on disk as well as in memory: +EXT2_DIRTY_FL of i_flags. If ext2_cleanup_compressed_inode() couldn't +finish it's job (e.g. due to I/O error) then it clears EXT2_CLEANUP_FL +of i_compr_flags, but leaves EXT2_DIRTY_FL high. + +In ext2_read_inode(), if EXT2_DIRTY_FL is high then EXT2_CLEANUP_FL is +raised, in the hope that ,whatever was preventing +ext2_cleanup_compressed_inode() from finishing` is now past. + +Except for ext2_read_inode() as noted above, everything that raises +EXT2_CLEANUP_FL (i.e. ext2_write_file(), ext2_ioctl() and +ext2_truncate()) also raises EXT2_DIRTY_FL. + +Nothing lowers either EXT2_CLEANUP_FL or EXT2_DIRTY_FL except +ext2_cleanup_compressed_inode() (and one or both of new_inode and +delete_inode routines). + + +One feels that at least one of these cleanup flags ought to +disappear. The main use of the persistent EXT2_DIRTY_FL is where the +user does `chattr -c' in order to decompress the file, but there isn't +enough space on the device to do this. We can get rid of this problem +by having ext2_ioctl() call ext2_cleanup_compressed_inode() +try to + + +Notes on a few variables +~~~~~~~~~~~~~~~~~~~~~~~~ + +Don't confuse the inode->i_dirt flag with (inode->u.ext2_i.i_flags & +EXT2_DIRTY_FL). See section `The cleanup scheme' above for a +description of EXT2_DIRTY_FL. + + +inode->u.ext2_i.i_clu_nblocks, +inode->u.ext2_i.i_log2_clu_nblocks: + +i_clu_nblocks is always equal to ,1 << i_clu_nblocks` (except during a +couple of cycles while they're being changed; I haven't consciously +tried to avoid problems for SMP machines in this respect). + +i_clu_nblocks is the number of blocks per cluster for this inode. + +Old information: these variables were previously called +`i_cluster_bits' and `i_cluster_size'. They were in an array: + +inode->u.ext2_i.i_cluster_bits[2], +inode->u.ext2_i.i_cluster_size[2]: + +I believe the reason these were declared as an array was for the case +where someone changes the cluster size of a file that was already +compressed. (Reason for this belief: All readers of these fields use +[0]. On creation (ialloc), read_inode, and `chattr +c' (where +previously uncompressed), both [0] and [1] are updated. On change +(IOC_SET_CLUSTERSIZE), only [0] is updated.) Since ,changing cluster +size of an already-compressed file` isn't implemented, I've renamed +them and made them scalars rather than arrays. + + +inode->u.ext2_i.i_flags: When the e2compr patch is applied, this +variable only holds the low 24 bits of the on-disk i_flags field. +(Without the e2compr patch applied, all 32 bits are available. An +interesting side effect of this is that user programs can access the +compression algorithm and cluster size on kernels without e2compr +patch by using the EXT2_IOC_GETFLAGS, EXT2_IOC_SETFLAGS ioctls.) + + +inode->u.ext2_i.i_compr_method: Holds the compression method +identifier. Starting from e2compr-0.4.0, this is different from an +algorithm identifier: an example of a method is gzip9; the +corresponding algorithm is gzip. See compress.c for where +ext2_method_table and ext2_algorithm_table are defined. ext2_fs.h has +some enumerations for addressing these tables (search for +`EXT2_NONE_METH' and `EXT2_NONE_ALG'). --- linux-3.2-rc5/fs/Kconfig 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/Kconfig 2011-12-13 14:22:47.826975380 +0100 @@ -7,6 +7,126 @@ menu "File systems" if BLOCK source "fs/ext2/Kconfig" + +config EXT2_COMPRESS + bool "Ext2 file compression (DANGEROUS)" + depends on EXT2_FS && EXPERIMENTAL + select CRYPTO + select CRYPTO_ALGAPI + select CRYPTO_DEFLATE + select ZLIB_INFLATE + select ZLIB_DEFLATE + help + Ext2 file compression allows transparent compression of files on an + ext2 filesystem. Transparent compression means that files are + stored on the disk in a compressed format but they are automatically + decompressed as they are read in and compressed when written out. + The user is in control of how and which files are compressed, using + the `chattr' utility (see chattr(1)). For the sake of safety, + administrative data (superblock, inodes, directories, etc.) are not + compressed. + + Compression is very useful if you're short on disk space, and + provides a better option than having lots of .gz files around. + For more information, see . + + You _need_ to have the special e2compr version of e2fsck to be able + to make use of this. + + If you say Y, you will be asked which compression algorithms you wish + to include. Gzip is a good all-round algorithm, as its 1..9 parameter + allows a good range of speed/compression trade-off. Other noteworthy + algorithms are LZV, which caters better to the faster/less compressing + end of the scale, and bzip, which caters slightly better to the more + compressing but slower end of the scale. + + Ext2 compression is still experimental, so unless you know you need + it, you'd better say N. + +menu "Ext2 file compression options" + depends on EXT2_COMPRESS + +choice + #depends on EXT2_DEFAULT_COMPR_METHOD_GZIP + prompt "Gzip parameter for default compression method" + default EXT2_DEFAULT_COMPR_METHOD_GZIP8 + help + You have selected `gzip' as your default compression algorithm, but + I need to know whether to use `gzip -1', `gzip -9', or somewhere + in between. gzip1 is the least compressing but fastest; gzip9 is the + most compressing and slowest; and the numbers in between have + characteristics in between (though not on a linear scale). + If unsure, say `8'. + +config EXT2_DEFAULT_COMPR_METHOD_GZIP1 + bool "1" +config EXT2_DEFAULT_COMPR_METHOD_GZIP2 + bool "2" +config EXT2_DEFAULT_COMPR_METHOD_GZIP3 + bool "3" +config EXT2_DEFAULT_COMPR_METHOD_GZIP4 + bool "4" +config EXT2_DEFAULT_COMPR_METHOD_GZIP5 + bool "5" +config EXT2_DEFAULT_COMPR_METHOD_GZIP6 + bool "6" +config EXT2_DEFAULT_COMPR_METHOD_GZIP7 + bool "7" +config EXT2_DEFAULT_COMPR_METHOD_GZIP8 + bool "8" +config EXT2_DEFAULT_COMPR_METHOD_GZIP9 + bool "9" + +endchoice + +config GZ_HACK + bool "Exclude .gz files from automatic compression" + depends on EXT2_COMPRESS + default y + help + If you say Y here, then files created with names ending in `.gz' or + `.?gz' or `.bz2' don't inherit the `c' ("compress") attribute from + their parent directory. (However, you can still do `chattr +c FILE' + if you want to try to compress it anyway.) This means that you + don't waste CPU time trying to compress a file that probably can't + be compressed. See fs/ext2/namei.c if you want to add other rules. + If you have any aesthetic sensibilities then you will say N here + and try to implement something better. Most people will say Y here. + + +choice + depends on EXT2_COMPRESS + prompt "Default cluster size (in blocks, usually 1KB each)" + default EXT2_DEFAULT_CLUSTER_BITS_5 + help + To make random access to compressed files reasonably fast the files + are compressed in clusters. By default, the clusters will be of the + size defined here but there is a modified version of the chattr + utility that can set the cluster size for each file independently. + Large clusters usually result in better compression at the cost of + being slower. + + Note that the answer to this question is specified in filesystem + blocks rather than in kilobytes, though most filesystems have 1KB + blocks anyway. (If you have a filesystem with large blocks then + you should know it, but if you want to check then "tune2fs -l + /dev/xxx | grep size".) The default is 32 blocks which is the + slowest setting but gives the best compression. + +config EXT2_DEFAULT_CLUSTER_BITS_2 + bool "4" +config EXT2_DEFAULT_CLUSTER_BITS_3 + bool "8" +config EXT2_DEFAULT_CLUSTER_BITS_4 + bool "16" +config EXT2_DEFAULT_CLUSTER_BITS_5 + bool "32" + +endchoice + +endmenu + + source "fs/ext3/Kconfig" source "fs/ext4/Kconfig" --- linux-3.2-rc5/include/linux/ext2_fs_c.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-3.2-rc5-e2c/include/linux/ext2_fs_c.h 2011-12-13 14:22:47.830975497 +0100 @@ -0,0 +1,498 @@ +/* + * Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch + * (transparent compression code) + * Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr) - Denis Richard (denis.richard@sxb.bsf.alcatel.fr) + * Adapted from patch e2compr-0.4.39-patch-2.2.18 . + */ + +#ifndef EXT2_FS_C_H +#define EXT2_FS_C_H + +#include +#include +#include +#include "../../fs/ext2/ext2.h" + +/* EXT2_COMPR_DEBUG enables: + * - all assertions + * - adler checksum checking + */ +//#undef EXT2_COMPR_DEBUG +#define EXT2_COMPR_DEBUG + +#ifdef EXT2_COMPR_DEBUG +# define assert(expr) \ + if(unlikely(!(expr))) { \ + printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ +#expr, __FILE__, __func__, __LINE__); \ + } +#else +# define assert(expr) do {} while (0) +#endif + + +/* proof get_cpu and put_cpu correctness by calling might_sleep() or mabye schedule(). + this will check if we are atomic */ +#ifdef EXT2_COMPR_DEBUG +#define CHECK_NOT_ATOMIC assert(! in_atomic());//might_sleep(); +#else +#define CHECK_NOT_ATOMIC +#endif + + +#undef EXT2_COMPR_REPORT +//#define EXT2_COMPR_REPORT +//#define EXT2_COMPR_REPORT_VERBOSE +//#define EXT2_COMPR_REPORT_PUT +//# define EXT2_COMPR_REPORT_FILEOPEN +//#define EXT2_COMPR_REPORT_MUTEX + +#ifdef EXT2_COMPR_REPORT +//# define EXT2_COMPR_REPORT_PUT +//# define EXT2_COMPR_REPORT_WA +//# define EXT2_COMPR_REPORT_MUTEX +//# define EXT2_COMPR_REPORT_ALLOC /* disk allocation etc. */ +//# define EXT2_COMPR_REPORT_ALGORITHMS /* Compression algorithms */ +//# define EXT2_COMPR_REPORT_VERBOSE /* Various things I don't think +// useful at the moment. */ +//#define EXT2_COMPR_REPORT_VERBOSE_INODE +#endif + + +#ifdef EXT2_COMPR_DEBUG +#define E2COMPR_VERSION "ext2-compression: e2c-0.4.58-smp-debug (26 August 2011) for kernel 3.1" +#else +#define E2COMPR_VERSION "ext2-compression: e2c-0.4.58-smp-release (26 August 2011) for kernel 3.1" +#endif + +#define EXT2_IOC_GETCLUSTERSIZE _IOR('c', 0, long) +#define EXT2_IOC_SETCLUSTERSIZE _IOW('c', 0, long) +#define EXT2_IOC_GETCOMPRMETHOD _IOR('c', 1, long) +#define EXT2_IOC_SETCOMPRMETHOD _IOW('c', 1, long) +#define EXT2_IOC_GETFIRSTCLUSTERSIZE _IOR('c', 2, long) +#define EXT2_IOC_RECOGNIZE_COMPRESSED _IOW('c', 2, long) +#define EXT2_IOC_GETCLUSTERBIT _IOR('c', 3, long) +#define EXT2_IOC_GETCOMPRRATIO _IOR('c', 4, long) +/* Don't use _IOW('c', {5,6}, long), as these are used by old + e2compress binaries as SETCLUSTERBIT and CLRCLUSTERBIT + respectively. */ + +/* EXT2_xxxx_ALG is an index into ext2_algorithm_table[] defined in + fs/ext2/compress.c. */ +/* N.B. Don't change these without also changing the table in + compress.c. Be careful not to break binary compatibility. + (EXT2_NONE_ALG and EXT2_UNDEF_ALG are safe from binary + compatibility problems, though, so they can safely be renumbered -- + and indeed probably should be if you do add another algorithm.) */ +#define EXT2_LZV1_ALG 0 +#define EXT2_LZRW3A_ALG 1 +#define EXT2_GZIP_ALG 2 +#define EXT2_BZIP2_ALG 3 +#define EXT2_LZO_ALG 4 +#define EXT2_NONE_ALG 5 +#define EXT2_UNDEF_ALG 6 +#define EXT2_N_ALGORITHMS 5 /* Count of "real" algorithms. Excludes + `none' and `undef'. */ + +/* EXT2_xxxx_METH is an index into ext2_method_table[] defined in + fs/ext2/compress.c. */ +/* N.B. Don't change these without also changing the table in + compress.c. */ +#define EXT2_LZV1_METH 0 +#define EXT2_AUTO_METH 1 +#define EXT2_DEFER_METH 2 +#define EXT2_NEVER_METH 3 +#define EXT2_BZIP2_METH 4 +#define EXT2_LZRW3A_METH 8 +#define EXT2_LZO1X_1_METH 10 +#define EXT2_GZIP_1_METH 16 +#define EXT2_GZIP_2_METH 17 +#define EXT2_GZIP_3_METH 18 +#define EXT2_GZIP_4_METH 19 +#define EXT2_GZIP_5_METH 20 +#define EXT2_GZIP_6_METH 21 +#define EXT2_GZIP_7_METH 22 +#define EXT2_GZIP_8_METH 23 +#define EXT2_GZIP_9_METH 24 + +#define EXT2_N_METHODS 32 /* Don't change this unless you know what + you're doing. In particular, it's tied + to the width of the algorithm field + in i_flags.*/ + +/* Note: EXT2_N_ALGORITHMS can't be increased beyond 16 without + changing the width of the s_algorithms_used field in the in-memory + superblock. The on-disk s_algorithms_used field is 32 bits long. + (This is in a state of flux. Currently (1998-02-05) there is no + distinction: we always use the s_es copy. */ + + +#define EXT2_MAX_CLUSTER_BYTES (32*1024) +#define EXT2_LOG2_MAX_CLUSTER_BYTES (5 + 10) + +#define EXT2_COMPRESS_MAGIC_04X 0x9ec7 +#define EXT2_MAX_CLUSTER_BLOCKS 32 +#define EXT2_MAX_CLUSTER_PAGES EXT2_MAX_CLUSTER_BYTES >> PAGE_CACHE_SHIFT +#define EXT2_ECOMPR EIO +/* A cluster is considered compressed iff the block number for the + last block of that cluster is EXT2_COMPRESSED_BLKADDR. If this + changes then check if there's anywhere that needs a cpu_to_le32() + conversion. */ +#define EXT2_COMPRESSED_BLKADDR 0xffffffff + +/* I like these names better. */ +#define EXT2_MAX_CLU_NBYTES EXT2_MAX_CLUSTER_BYTES +#define EXT2_LOG2_MAX_CLU_NBYTES EXT2_LOG2_MAX_CLUSTER_BYTES +#define EXT2_MAX_CLU_NBLOCKS EXT2_MAX_CLUSTER_BLOCKS + + +#ifndef __KERNEL__ + +/* Cluster head on disk, for e2compr versions before 0.4.0. I'm + leaving this here so tht as I may make e2compress able to read + old-style e2compr files. */ +struct ext2_cluster_head_03x { + __u16 magic; /* == EXT2_COMPRESS_MAGIC_03X */ + __u16 len; /* size of uncompressed data */ + __u16 compr_len; /* size of compressed data */ + __u8 method; /* compress method */ + __u8 reserved_0; + __u32 bitmap; /* block bitmap */ + __u32 reserved_2; /* 0 or adler32 checksum of + _compressed_ data */ +}; +# define EXT2_COMPRESS_MAGIC_03X 0x8ec7 /* Head magic number + for e2compr versions + before 0.4.0. */ +#endif /* !__KERNEL__ */ + + +#ifdef __KERNEL__ +# ifdef CONFIG_EXT2_COMPRESS + +//mw +#define CONFIG_EXT2_HAVE_GZIP + +/* If defined, compress each cluster as soon as we get to the end of a + whole cluster, when writing. (If undefined, we wait until + ext2_release_file() or the like.) */ +#define EXT2_COMPRESS_WHEN_CLU + +# ifdef CONFIG_EXT2_DEFAULT_COMPR_METHOD_DEFER +# define EXT2_DEFAULT_COMPR_METHOD EXT2_DEFER_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_BZIP2) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_BZIP2_METH +# define EXT2_DEFAULT_COMPR_METHOD EXT2_LZO1X_1_ME +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZO) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_LZO1X_1_METH +# ifndef CONFIG_EXT2_HAVE_LZO +# error "Default algorithm (lzo) is not compiled in." +# endif +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZV1) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_LZV1_METH +# ifndef CONFIG_EXT2_HAVE_LZV1 +# error "Default algorithm (lzv1) is not compiled in." +# endif +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZRW3A) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_LZRW3A_METH +# ifndef CONFIG_EXT2_HAVE_LZRW3A +# error "Default algorithm (lzrw3a) is not compiled in." +# endif +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP1) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_1_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP2) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_2_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP3) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_3_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP4) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_4_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP5) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_5_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP6) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_6_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP7) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_7_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP8) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_8_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP9) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_9_METH +# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_BZIP2) +# define EXT2_DEFAULT_COMPR_METHOD EXT2_BZIP2_METH +# ifndef CONFIG_EXT2_HAVE_BZIP2 +# error "Default algorithm (bzip2) is not compiled in." +# endif +# else +# error "No default compression algorithm." +# endif +# if EXT2_DEFAULT_COMPR_METHOD >= EXT2_GZIP_1_METH && EXT2_DEFAULT_COMPR_METHOD <= EXT2_GZIP_9_METH +# ifndef CONFIG_EXT2_HAVE_GZIP +# error "Default algorithm (gzip) is not compiled in." +# endif +# endif + +# if defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_2) +# define EXT2_DEFAULT_LOG2_CLU_NBLOCKS 2 +# elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_3) +# define EXT2_DEFAULT_LOG2_CLU_NBLOCKS 3 +# elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_4) +# define EXT2_DEFAULT_LOG2_CLU_NBLOCKS 4 +# elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_5) +# define EXT2_DEFAULT_LOG2_CLU_NBLOCKS 5 +# else +# error "No default cluster size." +# endif + +# define EXT2_DEFAULT_CLU_NBLOCKS (1 << EXT2_DEFAULT_LOG2_CLU_NBLOCKS) + +# if (EXT2_LZV1_ALG != 0) || (EXT2_BZIP2_ALG != 3) || (EXT2_LZO_ALG != 4) || (EXT2_N_ALGORITHMS != 5) +# error "this code needs changing; but then, you shouldn't be messing with algorithm ids anyway unless you are very careful to protect disk format compatibility" +# endif +# ifdef CONFIG_EXT2_HAVE_LZV1 +# define _ext2_lzv1_builtin (1 << EXT2_LZV1_ALG) +# else +# define _ext2_lzv1_builtin 0 +# endif +# ifdef CONFIG_EXT2_HAVE_LZRW3A +# define _ext2_lzrw3a_builtin (1 << EXT2_LZRW3A_ALG) +# else +# define _ext2_lzrw3a_builtin 0 +# endif +# ifdef CONFIG_EXT2_HAVE_GZIP +# define _ext2_gzip_builtin (1 << EXT2_GZIP_ALG) +# else +# define _ext2_gzip_builtin 0 +# endif +# ifdef CONFIG_EXT2_HAVE_BZIP2 +# define _ext2_bzip2_builtin (1 << EXT2_BZIP2_ALG) +# else +# define _ext2_bzip2_builtin 0 +# endif +# ifdef CONFIG_EXT2_HAVE_LZO +# define _ext2_lzo_builtin (1 << EXT2_LZO_ALG) +# else +# define _ext2_lzo_builtin 0 +# endif + +# ifdef CONFIG_EXT2_HAVE_LZV1_MODULE +# define _ext2_lzv1_module (1 << EXT2_LZV1_ALG) +# else +# define _ext2_lzv1_module 0 +# endif +# ifdef CONFIG_EXT2_HAVE_LZRW3A_MODULE +# define _ext2_lzrw3a_module (1 << EXT2_LZRW3A_ALG) +# else +# define _ext2_lzrw3a_module 0 +# endif +# ifdef CONFIG_EXT2_HAVE_GZIP_MODULE +# define _ext2_gzip_module (1 << EXT2_GZIP_ALG) +# else +# define _ext2_gzip_module 0 +# endif +# ifdef CONFIG_EXT2_HAVE_BZIP2_MODULE +# define _ext2_bzip2_module (1 << EXT2_BZIP2_ALG) +# else +# define _ext2_bzip2_module 0 +# endif +# ifdef CONFIG_EXT2_HAVE_LZO_MODULE +# define _ext2_lzo_module (1 << EXT2_LZO_ALG) +# else +# define _ext2_lzo_module 0 +# endif + +# define EXT2_ALGORITHMS_MODULE (_ext2_lzv1_module | _ext2_lzrw3a_module | _ext2_gzip_module | _ext2_bzip2_module | _ext2_lzo_module) +# define EXT2_ALGORITHMS_BUILTIN (_ext2_lzv1_builtin | _ext2_lzrw3a_builtin | _ext2_gzip_builtin | _ext2_bzip2_builtin | _ext2_lzo_builtin) + +# if EXT2_ALGORITHMS_MODULE & EXT2_ALGORITHMS_BUILTIN +# error "Arithmetic error? Some algorithm appears to be both built-in and a module." +# endif + +/* EXT2_ALGORITHMS_SUPP is what we test when mounting a filesystem. + See fs/ext2/super.c. */ +# define EXT2_ALGORITHMS_SUPP (EXT2_ALGORITHMS_MODULE | EXT2_ALGORITHMS_BUILTIN) +# if EXT2_ALGORITHMS_SUPP == 0 +# error "You must select at least one compression algorithm." +# endif + +/* Cluster head on disk. Little-endian. */ +struct ext2_cluster_head { + __u16 magic; /* == EXT2_COMPRESS_MAGIC_04X. */ + __u8 method; /* compression method id. */ + __u8 holemap_nbytes; /* length of holemap[] array */ + __u32 checksum; /* adler32 checksum. Checksum covers all fields + below this one, and the compressed data. */ + __u32 ulen; /* size of uncompressed data */ + __u32 clen; /* size of compressed data (excluding cluster head) */ + __u8 holemap[0]; /* bitmap describing where to put holes. */ +}; + + +struct ext2_wa_S { + __u8 u[EXT2_MAX_CLUSTER_BYTES]; /* Uncompressed data. */ + __u8 c[EXT2_MAX_CLUSTER_BYTES]; /* Compressed data. */ + __u8 heap[1]; /* Heap: working space for de/compression routines. */ +}; + +# define EXT2_CLEANUP_FL 0x40 /* See Readme.e2compr */ +# define EXT2_OSYNC_INODE 0x20 /* sync of inode running */ +# define ROUNDUP_DIV(_n, _d) ((_n) ? 1 + (((_n) - 1) / (_d)) : 0) +# define ROUNDUP_RSHIFT(_n, _b) ((_n) ? 1 + (((_n) - 1) >> (_b)) : 0) + +# if defined(EXT2_NDIR_BLOCKS) && (EXT2_NDIR_BLOCKS != 12) +# error "e2compr currently assumes that EXT2_NDIR_BLOCKS is 12." +/* If EXT2_NDIR_BLOCKS changes then change the definitions of + ext2_first_cluster_nblocks() and friends, and search the patch for + anywhere where 12 is hard-coded. (At the time of writing, it's + only hard-coded in ext2_first_cluster_nblocks().) What we want to + achieve is for clusters not to straddle address blocks. Apart from + performance, some code in compress.c (search for `straddle') + assumes this. */ +# endif + +# include + +# define EXT2_ALG_INIT_COMPRESS 1 +# define EXT2_ALG_INIT_DECOMPRESS 2 + +extern int ext2_get_cluster_pages (struct inode*, u32, struct page**, struct page *, int); +extern int ext2_get_cluster_extra_pages (struct inode*, u32, struct page**, struct page**); +extern int ext2_kmap_cluster_pages (struct page *, struct page**, struct page**); +extern int ext2_kunmap_cluster_pages (struct page *, struct page**, struct page**); +extern int ext2_get_cluster_blocks (struct inode*, u32, struct buffer_head**, struct page**, struct page**, int); +extern int ext2_decompress_cluster (struct inode*, u32); +extern int ext2_decompress_pages(struct inode*, u32, struct page**); +extern int ext2_compress_cluster (struct inode*, u32); +extern int ext2_decompress_inode (struct inode*); +extern int ext2_cleanup_compressed_inode (struct inode*); +extern void ext2_update_comprblk (struct inode *); +extern int ext2_get_dcount(struct inode *inode); + +extern size_t ext2_decompress_blocks (struct inode*, struct buffer_head**, int, size_t, u32 cluster); +extern int ext2_count_blocks (struct inode*); +extern int ext2_recognize_compressed (struct inode *, unsigned cluster); +extern unsigned long ext2_adler32 (unsigned long, unsigned char*, int); + +extern size_t ext2_iLZV1 (int); +extern size_t ext2_iLZV2 (int); +extern size_t ext2_iNONE (int); +extern size_t ext2_iGZIP (int); +extern size_t ext2_iBZIP2 (int); +extern size_t ext2_iLZO (int); +extern size_t ext2_iLZRW3A (int); +extern size_t ext2_iZLIB (int); + +extern size_t ext2_wLZV1 (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_wLZV2 (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_wNONE (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_wGZIP (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_wBZIP2 (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_wLZO (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_wLZRW3A (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_wZLIB (__u8*, __u8*, void*, size_t, size_t, int); + +extern size_t ext2_rLZV1 (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_rLZV2 (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_rNONE (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_rGZIP (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_rBZIP2 (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_rLZO (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_rLZRW3A (__u8*, __u8*, void*, size_t, size_t, int); +extern size_t ext2_rZLIB (__u8*, __u8*, void*, size_t, size_t, int); + +struct ext2_algorithm { + char *name; + int avail; + size_t (*init) (int); + size_t (*compress) (__u8*, __u8*, void*, size_t, size_t, int); + size_t (*decompress) (__u8*, __u8*, void*, size_t, size_t, int); +}; + +struct ext2_method { + unsigned alg; + int xarg; +}; + + +# define ext2_first_cluster_nblocks(_i) ((EXT2_I(_i))->i_clu_nblocks > 4 && (_i)->i_sb->s_blocksize < 4096 ? 12 : 4) +# define ext2_block_to_cluster(_i,_b) ((_b) < ext2_first_cluster_nblocks(_i) ? 0 : (((_b) - ext2_first_cluster_nblocks(_i)) >> (EXT2_I(_i))->i_log2_clu_nblocks) + 1) +# define ext2_offset_to_cluster(_i,_o) ext2_block_to_cluster((_i), ((_o) >> (_i)->i_sb->s_blocksize_bits)) +# define ext2_n_clusters(_i) ((_i)->i_size ? ext2_offset_to_cluster((_i), (_i)->i_size - 1) + 1 : 0) +# define ext2_cluster_block0(_i,_c) ((_c) ? ext2_first_cluster_nblocks(_i) + (((_c) - 1) << (EXT2_I(_i))->i_log2_clu_nblocks) : 0) +# define ext2_cluster_nblocks(_i,_c) ((_c) ? (EXT2_I(_i))->i_clu_nblocks : ext2_first_cluster_nblocks(_i)) +# define ext2_cluster_offset(_i,_c) ((_c) ? ext2_cluster_block0((_i), (_c)) << (_i)->i_sb->s_blocksize_bits : 0) + +# define ext2_first_cluster_npages(_i) ((EXT2_I(_i))->i_clu_nblocks > 4 && (_i)->i_sb->s_blocksize < 4096 ? 12 >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : 4 >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits)) +# define ext2_page_to_cluster(_i,_p) ((_p) < ext2_first_cluster_npages(_i) ? 0 : (((_p) - ext2_first_cluster_npages(_i)) >> (((EXT2_I(_i))->i_log2_clu_nblocks)+(_i)->i_sb->s_blocksize_bits-PAGE_CACHE_SHIFT)) + 1) +# define ext2_cluster_page0(_i,_c) ((_c) ? ext2_cluster_block0(_i, _c) >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : 0) +# define ext2_cluster_npages(_i,_c) ((_c) ? (EXT2_I(_i))->i_clu_nblocks >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : ext2_first_cluster_npages(_i)) + +static inline int +ext2_offset_is_clu_boundary(struct inode *inode, u32 off) +{ + if (off & (inode->i_sb->s_blocksize - 1)) + return 0; + if (off == 0) + return 1; + off >>= inode->i_sb->s_blocksize_bits; + if (off < ext2_first_cluster_nblocks(inode)) + return 0; + off -= ext2_first_cluster_nblocks(inode); + return !(off & (EXT2_I(inode)->i_clu_nblocks - 1)); +} + +struct ext2_wa_contents_S { + ino_t ino; + dev_t dev; + unsigned cluster; +}; + +DECLARE_PER_CPU(struct ext2_wa_S *, ext2_rd_wa); +DECLARE_PER_CPU(struct ext2_wa_S *, ext2_wr_wa); + +extern void ext2_alloc_rd_wa(void); +extern void ext2_alloc_wr_wa(void); + +extern struct ext2_algorithm ext2_algorithm_table[]; +extern struct ext2_method ext2_method_table[]; /*mw: is static so far, no writes*/ + +/* Both of these return -errno if error, 0 if not compressed, positive + if compressed. (You should use the macro unless you've already + tested COMPRBLK.) */ +extern int ext2_cluster_is_compressed_fn (struct inode *inode, __u32 cluster); +static inline int ext2_cluster_is_compressed (struct inode *inode, __u32 cluster) +{ + if ((EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL) == 0) + return 0; + return ext2_cluster_is_compressed_fn (inode, cluster); +} +extern unsigned ext2_calc_free_ix (unsigned , u8 const *, unsigned ); +extern int ext2_unpack_blkaddrs(struct inode *, struct buffer_head **, int, unsigned , u8 const *, unsigned , unsigned , unsigned , unsigned ); + +# define HOLE_BLKADDR(_b) \ + (((_b) == 0) \ + || ((_b) == EXT2_COMPRESSED_BLKADDR)) +# else /* !CONFIG_EXT2_COMPRESS */ +# define HOLE_BLKADDR(_b) ((_b) == 0) +# endif + +/* For some reason or other, I see code like `if (le32_to_cpu(tmp) != + 0)' around in the kernel. So far I haven't checked whether or not + the compiler knows that the swab can be dropped. */ +# if defined(EXT2_COMPRESSED_BLKADDR) && EXT2_COMPRESSED_BLKADDR != 0xffffffff +/* This may be a false positive; the "correct" test would be `if + defined(CONFIG_EXT2_COMPRESS)', but if this test does succeed, then + there is at least cause to have a look around. */ +# error "Next bit of code is wrong." +# endif + +# define HOLE_BLKADDR_SWAB32(_b) HOLE_BLKADDR(_b) + +#ifdef EXT2_COMPR_REPORT +#define trace_e2c(format, args...) printk(KERN_DEBUG format, ## args) +#else +#define trace_e2c(format, args...) do {} while(0) +#endif + +#endif /* __KERNEL__ */ + + +#endif /* EXT2_FS_C_H */ --- linux-3.2-rc5/fs/ext2/Makefile 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/Makefile 2011-12-13 14:22:47.830975498 +0100 @@ -2,10 +2,17 @@ # Makefile for the linux ext2-filesystem routines. # +ifeq ($(CONFIG_EXT2_COMPRESS),y) + +COMPRESS_STUFF := adler32.o compress.o e2zlib.o\ + $($(obj-y):%/=%/ext2-compr-%.o) +endif + obj-$(CONFIG_EXT2_FS) += ext2.o ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o + ioctl.o namei.o super.o symlink.o $(COMPRESS_STUFF) + ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o --- linux-3.2-rc5/fs/ext2/compress.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/compress.c 2011-12-13 14:22:47.839975781 +0100 @@ -0,0 +1,3420 @@ +/* + * linux/fs/ext2/compress.c + * + * Copyright (C) 1995 Antoine Dumesnil de Maricourt (dumesnil@etca.fr) + * (transparent compression code) + */ + +/* + * Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch FRANCE + * + * Transparent compression code for 2.4 kernel. + * + * Denis Richard (denis.richard@sxb.bsf.alcatel.fr) + * Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr) + * + * Adapted from patch e2compr-0.4.39-patch-2.2.18 . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#ifdef CONFIG_HIGHMEM +#define restore_b_data_himem(bh) assert(page_address(bh->b_page)); bh->b_data = page_address(bh->b_page) + bh_offset(bh) + + + +int ext2_kmap_cluster_pages(struct page *page, struct page *pg[], + struct page *epg[]) +{ + int i = 0; + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (!pg[i]) + break; + if (epg && epg[i]) + kmap(epg[i]); + else + kmap(pg[i]); + } + + if (page) + kmap(page); + return 0; +} + + +int ext2_kunmap_cluster_pages(struct page *page, struct page *pg[], + struct page *epg[]) +{ + int i = 0; + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (!pg[i]) + break; + if (epg && epg[i]) + kunmap(epg[i]); + else + kunmap(pg[i]); + } + + if (page) + kunmap(page); + return 0; +} +#else //no high-mem: +#define restore_b_data_himem(bh) ; +#endif + + +/*none compression dummy functions*/ +size_t ext2_iNONE (int action) { return 0; } +size_t ext2_wNONE (__u8 *ibuf, __u8 *obuf, void *wa, size_t ilen, size_t olen, int xarg) { return 0; } +size_t ext2_rNONE (__u8 *ibuf, __u8 *obuf, void *wa, size_t ilen, size_t olen, int xarg) { return 0; } + +/* + * Algorithm and method tables + */ +struct ext2_algorithm ext2_algorithm_table[] = { + /* Note: all algorithms must have the `name' field filled in. + This is used to autoload algorithm modules (ext2-compr-%s), and + in kernel printk. */ + /* N.B. Do not renumber these algorithms! (To do so is to change + the binary format.) It's OK for `none' and `undef' to be + renumbered, though. */ + + /* Fields: + name; available; routines for: + init, compress, decompress. */ + {"lzv1", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE}, + {"lzrw3a", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE}, + {"gzip", 1, ext2_iZLIB, ext2_wZLIB, ext2_rZLIB}, //Andreas: workaround + {"bzip2", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE}, + {"lzo", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE}, + {"none", 1, ext2_iNONE, ext2_wNONE, ext2_rNONE}, + + /* This "algorithm" is for unused entries in the method table. + It differs from EXT2_NONE_ALG in that it is considered + unavailable, whereas `none' is always available. */ + {"undef", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE}, + +}; + +/* Note: EXT2_N_ALGORITHMS can't be increased beyond 16 without + changing the width of the s_algorithms_used field in the in-memory + superblock. The on-disk s_algorithms_used field is 32 bits long. + (This is in a state of flux. Currently (1998-02-05) there is no + distinction: we always use the s_es copy. */ + +/* The size of this table must be 32 to prevent Oopsen from + invalid data. We index this from 5 bits of i_flags, so + the size is (1 << 5) == 32. */ +struct ext2_method ext2_method_table[32] = { + /* Fields: algorithm id, algorithm argument. */ + {EXT2_LZV1_ALG, 0}, + {EXT2_NONE_ALG, 0}, /* 1: auto */ + {EXT2_NONE_ALG, 0}, /* 2: defer */ + {EXT2_NONE_ALG, 0}, /* 3: never */ + {EXT2_BZIP2_ALG, 0}, /* 4: bzip2 */ + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_LZRW3A_ALG, 0}, /* 8: lzrw3a */ + {EXT2_UNDEF_ALG, 0}, + {EXT2_LZO_ALG, 0}, /* 10: lzo1x_1 */ + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_GZIP_ALG, 1}, /* 16 */ + {EXT2_GZIP_ALG, 2}, + {EXT2_GZIP_ALG, 3}, + {EXT2_GZIP_ALG, 4}, + {EXT2_GZIP_ALG, 5}, + {EXT2_GZIP_ALG, 6}, + {EXT2_GZIP_ALG, 7}, + {EXT2_GZIP_ALG, 8}, + {EXT2_GZIP_ALG, 9}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0}, + {EXT2_UNDEF_ALG, 0} +}; + + +static void ext2_mark_algorithm_use(struct inode *inode, unsigned alg) +{ + struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb); + + /* Hopefully, lock_super() isn't needed here, as we don't + block in the critical region. True? */ + assert(alg < EXT2_N_ALGORITHMS); + if (sbi->s_es->s_feature_incompat + & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)) { + sbi->s_es->s_algorithm_usage_bitmap |= cpu_to_le32(1 << alg); + } else { + struct ext2_super_block *es = sbi->s_es; + + es->s_algorithm_usage_bitmap = cpu_to_le32(1 << alg); + es->s_feature_incompat + |= cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION); + if (es->s_rev_level < EXT2_DYNAMIC_REV) { + /* Raise the filesystem revision level to + EXT2_DYNAMIC_REV so that s_feature_incompat + is honoured (except in ancient kernels / + e2fsprogs). We must also initialize two + other dynamic-rev fields. The remaining + fields are assumed to be already correct + (e.g. still zeroed). */ + es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV); + es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO); + es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE); + } + } + mark_buffer_dirty(sbi->s_sbh); +} + + +/* Displays an error message if algorithm ,alg` is not marked in use, + and then marks it in use. */ +static void ext2_ensure_algorithm_use(struct inode *inode, unsigned alg) +{ + assert(alg < EXT2_N_ALGORITHMS); + + if (!(EXT2_SB(inode->i_sb)->s_es->s_algorithm_usage_bitmap + & cpu_to_le32(1 << alg))) { + ext2_msg(inode->i_sb, "algorithm usage bitmap algorithm %s not marked used in inode %lu", + ext2_algorithm_table[alg].name, inode->i_ino); + ext2_mark_algorithm_use(inode, alg); + } +} + + +/*mw: out of cache bug fix 5-16-07 */ +static void create_empty_buffers_e2c(struct page *page, + unsigned long blocksize, + unsigned long b_state, + struct inode *inode) +{ + struct buffer_head *bh, *head, *tail; + + head = alloc_page_buffers(page, blocksize, 1); + bh = head; + do { + bh->b_state |= b_state; + tail = bh; + bh->b_bdev = NULL; //mw: make it like 2.4 + bh->b_blocknr = 0; //mw: make it like 2.4 + bh->b_end_io = NULL; //mw: make it like 2.4 + bh = bh->b_this_page; + } while (bh); + tail->b_this_page = head; + spin_lock(&inode->i_mapping->private_lock); + if (PageUptodate(page) || PageDirty(page)) { + bh = head; + do { + if (PageDirty(page)) + set_buffer_dirty(bh); + if (PageUptodate(page)) + set_buffer_uptodate(bh); + bh = bh->b_this_page; + } while (bh != head); + } + attach_page_buffers(page, head); + spin_unlock(&inode->i_mapping->private_lock); +} + +int ext2_get_cluster_pages(struct inode *inode, u32 cluster, + struct page *pg[], struct page *page, int compr) +{ + int nbpg, npg, i; + u32 page0; /* = position within file (not position within fs). */ + u32 idx = 0; + struct page *cached_page; + struct pagevec lru_pvec; + + /*mw */ + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) + pg[i] = NULL; + + cached_page = NULL; + pagevec_init(&lru_pvec, 0); + + page0 = ext2_cluster_page0(inode, cluster); + nbpg = ext2_cluster_npages(inode, cluster); + + if (compr && (((page0 + nbpg) << PAGE_CACHE_SHIFT) > inode->i_size)) + nbpg = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT) - page0 + 1; +#ifdef EXT2_COMPR_REPORT + trace_e2c("ext2_get_cluster_pages: page0=%d, nbpg=%d page=%ld\n", + page0, nbpg, ((page != NULL) ? page->index : 0)); +#endif + for (npg = 0; npg < nbpg; npg++) { + if ((page == NULL) || ((page0 + npg) != page->index)) { + //pg[npg] = __grab_cache_page(inode->i_mapping, page0+npg); /* &cached_page, &lru_pvec);*/ + pg[npg] = grab_cache_page_write_begin(inode->i_mapping, page0+npg, 0); + if (!pg[npg]) + goto error; + } else { + pg[npg] = page; + } + if (!page_has_buffers(pg[npg])) { + ClearPageUptodate(pg[npg]); + ClearPageDirty(pg[npg]); + create_empty_buffers_e2c(pg[npg], inode->i_sb->s_blocksize, 0, inode); + if (unlikely(!page_has_buffers(pg[npg]))) + trace_e2c("ext2_get_cluster_pages: NOMEM!\n"); + assert(!PageUptodate(pg[npg])); + assert(!PageDirty(pg[npg])); + } + } + //set remaining pages to NULL + for (idx = npg; idx < EXT2_MAX_CLUSTER_PAGES; idx++) + pg[idx] = NULL; + + if (cached_page) + page_cache_release(cached_page); + pagevec_lru_add_file(&lru_pvec); + pagevec_free(&lru_pvec); + return (npg); + error: + if (cached_page) + page_cache_release(cached_page); + pagevec_lru_add_file(&lru_pvec); + pagevec_free(&lru_pvec); + while (--npg >= 0) { + if ((page == NULL) || ((page0 + npg) != page->index)) { + unlock_page(pg[npg]); + page_cache_release(pg[npg]); + } + pg[npg] = NULL; + } + trace_e2c("ext2_get_cluster_pages: error no page\n"); + return (-ENOMEM); +} + + +int ext2_get_cluster_extra_pages(struct inode *inode, u32 cluster, + struct page *pg[], struct page *epg[]) +{ + struct page *page; + int nbpg, npg, i; + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) + epg[i] = NULL; + + nbpg = ext2_cluster_npages(inode, cluster); + for (npg = 0; npg < nbpg; npg++) { + if (pg[npg] == NULL) + break; + if (PageUptodate(pg[npg])) { + //page = page_cache_alloc(inode->i_mapping); + //mw: has gfp-mask of adress-space: gfp_t mapping_gfp_mask(struct address_space * mapping) + // don't trigger. shrink_dcache_memory which might call ext2_cleanup_compressed_inode with the SAME mutex. + page = __page_cache_alloc(GFP_NOFS); + + if (!page) { + goto error; + } + ClearPageError(page); + ClearPageReferenced(page); + ClearPageUptodate(page); + ClearPageDirty(page); + lock_page(page); + page->index = pg[npg]->index; + + if (!page_has_buffers(page)) { + create_empty_buffers_e2c(page, inode->i_sb->s_blocksize, 0, + inode); + /*mw : only the "extra_pages" for decompression need create_empty_buffers_unlocked, because + * they have no mapping-context and they must not have one. Otherwise they get need a page->index + * which belongs always to an address_space object (e.g.: inode). But I think this is not intented here. + * we just need thei buffers for a short time of decompression */ + if (unlikely(!page_has_buffers(page))) + return printk("Error: NOMEM!\n"); + } + + epg[npg] = page; +#ifdef EXT2_COMPR_REPORT + trace_e2c + ("ext2_get_cluster_extra_pages: allocated page idx=%ld\n", + pg[npg]->index); +#endif + } else { + epg[npg] = NULL; + } + } + return (npg); + error: + while (--npg >= 0) + if (epg[npg]) { + ClearPageDirty(epg[npg]); + ClearPageUptodate(epg[npg]); + try_to_free_buffers(epg[npg]); + unlock_page(epg[npg]); + assert(page_count(epg[npg]) == 1); + page_cache_release(epg[npg]); + } + trace_e2c("ext2_get_cluster_extra_pages: error no page\n"); + return (-ENOMEM); + +} + +/* Read every block in the cluster. The blocks are stored in the bh + array, which must be big enough. + + Return the number of block contained in the cluster, or -errno if an + error occured. The buffers should be released by the caller + (unless an error occurred). + + The inode must be locked, otherwise it is possible that we return + some out of date blocks. + + Called by : + + ext2_decompress_cluster() [i_sem] + ext2_compress_cluster() [i_sem] + ext2_readpage() [i_sem] */ + + +int ext2_get_cluster_blocks(struct inode *inode, u32 cluster, + struct buffer_head *bh[], struct page *pg[], + struct page *epg[], int compr) +{ + struct buffer_head *br[EXT2_MAX_CLUSTER_BLOCKS]; + int nreq, nbh = 0, npg, i; + u32 clu_nblocks; + int err; + const int blocks = PAGE_CACHE_SIZE >> inode->i_sb->s_blocksize_bits; + + /*mw */ + for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) + bh[i] = NULL; + + assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */ + + /* + * Request full cluster. + */ + { + u32 endblk; + u32 block; /* = position within file (not position within fs). */ + u32 nbpg; + u32 page0; /* = position within file (not position within fs). */ + u32 idx; + + block = ext2_cluster_block0(inode, cluster); + clu_nblocks = ext2_cluster_nblocks(inode, cluster); + /* impl: Don't shorten endblk for i_size. The + remaining blocks should be NULL anyway, except in + the case when called from ext2_decompress_cluster + from ext2_truncate, in which case i_size is short + and we _want_ to get all of the blocks. */ + endblk = block + clu_nblocks; + + page0 = ext2_cluster_page0(inode, cluster); + nbpg = ext2_cluster_npages(inode, cluster); + + if (compr + && (((page0 + nbpg) << PAGE_CACHE_SHIFT) > inode->i_size)) { + nbpg = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT) - page0 + 1; + endblk = + block + + (nbpg << + (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)); + } + + idx = page0 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); +#ifdef EXT2_COMPR_REPORT + trace_e2c("ext2_get_cluster_blocks: page0=%d, nbpg=%d\n", page0, + nbpg); +#endif + for (npg = 0; npg < nbpg; npg++) { + struct buffer_head *buffer; + + if ((epg != NULL) && (epg[npg] != NULL)) + buffer = page_buffers(epg[npg]); + else + buffer = page_buffers(pg[npg]); + for (i = 0; i < blocks && (block + nbh) < endblk; + buffer = buffer->b_this_page, i++) { + if (idx == (block + nbh)) { + bh[nbh] = buffer; + nbh++; + } + idx++; + } + } +#ifdef EXT2_COMPR_REPORT + trace_e2c + ("ext2_get_cluster_blocks: get every pages and %d buffers\n", + nbh); +#endif + + for (nbh = 0, nreq = 0; block < endblk; nbh++) { + assert(bh[nbh] != NULL); + bh[nbh]->b_blocknr = 0; + clear_bit(BH_Mapped, &bh[nbh]->b_state); + + //mw: does not work with 2.6 and holes!!! + //err=ext2_get_block(inode, block++, bh[nbh], (PageDirty(bh[nbh]->b_page) ? 1 : 0)); + err = ext2_get_block(inode, block++, bh[nbh], 0); + /* mw: 0: we dont' create non existing blocks here + * let's do it just before the writeback, when we know, which blocks we really need...*/ + //err=ext2_get_block(inode, block++, bh[nbh], (buffer_dirty(bh[nbh]) ? 1 : 0)); + + /* mw: bdev-bug-fix: for files which got compressed and now consume less buffers + * ext2_get_block returns 0, for a empty-block. As these buffer were used before + * the bh[nbh]->b_bdev might be != NULL or just invalid. So we set them explicitly + * to NULL. */ + //printk("Get Block cluster %i: (%#x):%i Blk-NR:%lu(%lu)[%lu-%lu] Bdev:%#x(%#x), PGDirty:%i, mapped:%i, PID: %lu\n", cluster, bh[nbh], nbh, block, + + //if we are not mapped, then the blocknr will be wrong + //we set a bdev here the we will write to some "random" block + if (!buffer_mapped(bh[nbh])) { + bh[nbh]->b_bdev = NULL; /* don't write wrongly mapped blocks !!! */ + /* mw: you encounter null pointer oops you MUST + * map your buffer using ext2_get_block()*/ + } + + if (bh[nbh]->b_blocknr != 0) { + if (!buffer_uptodate(bh[nbh]) + /* TODO: Do we need this + `!buffer_locked' test? */ + && !buffer_locked(bh[nbh]) + && !PageDirty(bh[nbh]->b_page)) + br[nreq++] = bh[nbh]; + } else if ((err != 0) + && (err != -EFBIG)) + /* impl: for some unknown reason, + ext2_getblk() returns -EFBIG if + !create and there's a hole. ==> not right any more in 2.4 */ + goto error; + } + for (i = nbh; i < EXT2_MAX_CLUSTER_BLOCKS; i++) { + bh[i] = NULL; + } + } +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c("ext2_get_cluster_blocks: nreq=%d for cluster=%d\n", nreq, + cluster); +#endif + + //read all blocks, which are not null-blocks + if (nreq > 0) + ll_rw_block(READ, nreq, br); + + /* + * Adjust nbh if we have some null blocks at end of cluster. + */ + while ((nbh != 0) && (bh[nbh - 1]->b_blocknr == 0)) + nbh--; + + /* + * Wait for blocks. + */ + err = -EIO; + CHECK_NOT_ATOMIC + for (i = 0; i < nbh; i++) + if ((!PageDirty(bh[i]->b_page)) && (bh[i]->b_blocknr != 0)) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) { /* Read error ??? */ + trace_e2c + ("ext2_get_cluster_blocks: wait_on_buffer error (blocknr=%ld)\n", + bh[i]->b_blocknr); + goto error; + } + } + assert(nbh <= EXT2_MAX_CLU_NBLOCKS); + + return nbh; + + error: + printk("ERROR: ext2_get_cluster_blocks()\n"); + return err; +} + + +/* Iterations over block in the inode are done with a generic + iteration key mechanism. We need one method to convert a block + number into a new key, one method to iterate (i.e., increment the + key) and one method to free the key. The code could be shared with + truncate.c, as this mechanism is very general. + + This code assumes tht nobody else can read or write the file + between ext2_get_key() and ext2_free_key(), so callers need to have + i_sem (which they all do anyway). */ + +/* TODO: Get all of the bkey routines to return -errno instead of + true/false. */ +/* TODO: The bkey routines currently assume tht address blocks are + allocated even if all contained addresses are NULL, but this is not + true. Make sure tht we differentiate between NULL block and error, + and then fix up ext2_set_key_blkaddr() and anything else (including + the pack/unpack routines). */ +struct ext2_bkey { + int level; + u32 block; + struct inode *inode; + int off[4]; + u32 *ptr[4]; + struct buffer_head *ibh[4]; +}; + + +/* + * Method to convert a block number into a key. + * + * Returns 1 on success, 0 on failure. You may safely, but need + * not, free the key even if ext2_get_key() fails. + */ +static int ext2_get_key(struct ext2_bkey *key, struct inode *inode, + u32 block) +{ + int x, level; + int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + + assert(atomic_read(&inode->i_mutex.count) <= 0); + + /* + * The first step can be viewed as translating the + * original block number in a special base (powers + * of addr_per_block). + */ + + key->block = block; + + key->off[0] = key->off[1] = key->off[2] = key->off[3] = 0; + key->ibh[0] = key->ibh[1] = key->ibh[2] = key->ibh[3] = NULL; + key->ptr[0] = key->ptr[1] = key->ptr[2] = key->ptr[3] = NULL; + + if (block >= EXT2_NDIR_BLOCKS) { + block -= EXT2_NDIR_BLOCKS; + + if (block >= addr_per_block) { + block -= addr_per_block; + + if (block >= addr_per_block * addr_per_block) { + block -= addr_per_block * addr_per_block; + + key->off[0] = EXT2_TIND_BLOCK; + key->off[1] = (block / (addr_per_block * addr_per_block)); + key->off[2] = + (block % (addr_per_block * addr_per_block)) / + addr_per_block; + key->off[3] = (block % addr_per_block); + level = 3; + } else { + key->off[0] = EXT2_DIND_BLOCK; + key->off[1] = block / addr_per_block; + key->off[2] = block % addr_per_block; + level = 2; + } + } else { + key->off[0] = EXT2_IND_BLOCK; + key->off[1] = block; + level = 1; + } + } else { + key->off[0] = block; + level = 0; + } + + /* + * In the second step, we load the needed buffers. + */ + + key->level = level; + key->inode = inode; + + key->ptr[0] = (u32 *) (&(EXT2_I(inode)->i_data)); + + for (x = 1; x <= level; x++) { + u32 *ptr; + + ptr = key->ptr[x - 1]; + if (ptr == NULL) + break; +/* Paul Whittaker tweak 19 Feb 2005 */ + block = le32_to_cpu(ptr[key->off[x - 1]]); + if (block == 0) + continue; // TLL 05/01/07 + if (x - 1 != 0) + block = le32_to_cpu(block); + if ((key->ibh[x] = __bread(inode->i_sb->s_bdev, + block, inode->i_sb->s_blocksize)) + == NULL) + goto error; + key->ptr[x] = (u32 *) (key->ibh[x]->b_data); + } + + return 1; + error: + for (; x != 0; x--) + if (key->ibh[x] != NULL) + brelse(key->ibh[x]); + return 0; +} + + +/* + * Find the block for a given key. Return 0 if there + * is no block for this key. + */ +static inline u32 ext2_get_key_blkaddr(struct ext2_bkey *key) +{ + assert(key->inode); + assert(atomic_read(&(key->inode)->i_mutex.count) <= 0); + +/* Paul Whittaker tweak 19 Feb 2005 */ + if (key->ptr[key->level] == NULL) + return 0; + return le32_to_cpu(key->ptr[key->level][key->off[key->level]]); +} + + +/* + * Change the block for a given key. Return 0 on success, + * -errno on failure. + */ +static inline int ext2_set_key_blkaddr(struct ext2_bkey *key, u32 blkaddr) +{ + char bdn[BDEVNAME_SIZE]; + assert(key->inode); + assert(atomic_read(&(key->inode)->i_mutex.count) <= 0); + + if (key->ptr[key->level] == NULL) { + /* The reason that this "can't happen" is that this + routine is only used to shuffle block numbers or by + free_cluster_blocks. Cluster sizes are such that + clusters can't straddle address blocks. So the + indirect block address can't be zero. AFAIK, ptr + can only be NULL on error or on null indirect block + address. Hmm, come to think of it, I think there + are still some callers that don't check for errors + from ext2_get_key(), so this still can happen until + those are fixed up. */ + printk(KERN_ERR + "ext2_set_key_blkaddr: can't happen: NULL parent. " + "dev=%s, ino=%lu, level=%u.\n", + bdevname(key->inode->i_sb->s_bdev, bdn), + key->inode->i_ino, key->level); + return -ENOSYS; + } + /* Paul Whittaker tweak 19 Feb 2005 */ + key->ptr[key->level][key->off[key->level]] = le32_to_cpu(blkaddr); + if (key->level > 0) + mark_buffer_dirty(key->ibh[key->level]); + return 0; +} + + +/* + * Increment the key. Returns 0 if we go beyond the limits, + * 1 otherwise. + * + * Precondition: -key->off[level] <= incr < addr_per_block. + */ +static int ext2_next_key(struct ext2_bkey *key, int incr) +{ + int addr_per_block = EXT2_ADDR_PER_BLOCK(key->inode->i_sb); + int x, level = key->level; + u32 tmp; + + assert(key->inode); + assert(atomic_read(&(key->inode)->i_mutex.count) <= 0); + + + /* + * Increment the key. This is done in two step: first + * adjust the off array, then reload buffers that should + * be reloaded (we assume level > 0). + */ + + assert(key->off[level] >= -incr); + assert(incr < addr_per_block); + key->block += incr; + key->off[level] += incr; + + /* + * First step: should be thought as the propagation + * of a carry. + */ + + if (level == 0) { + if (key->off[0] >= EXT2_NDIR_BLOCKS) { + key->off[1] = key->off[0] - EXT2_NDIR_BLOCKS; + key->off[0] = EXT2_IND_BLOCK; + level = 1; + } + x = 0; + } else { + for (x = level; x > 0; x--) { + if (key->off[x] >= addr_per_block) { + key->off[x] -= addr_per_block; + key->off[x - 1]++; + + if (x == 1) { + if (++level < 4) { + key->off[level] = key->off[level - 1]; + key->off[level - 1] = 0; + } else + return 0; + } + } else + break; + } + } + + /* + * Second step: reload the buffers that have changed. + */ + + key->level = level; + + CHECK_NOT_ATOMIC + while (x++ < level) { + if (key->ibh[x] != NULL) { + if (IS_SYNC(key->inode) && buffer_dirty(key->ibh[x])) { + //mw: + assert(buffer_mapped(key->ibh[x]) + && (key->ibh[x]->b_bdev != NULL)); + ll_rw_block(WRITE, 1, &(key->ibh[x])); + wait_on_buffer(key->ibh[x]); + } + brelse(key->ibh[x]); + } +/* Paul Whittaker tweak 19 Feb 2005 */ + if ((key->ptr[x - 1] != NULL) + && ((tmp = le32_to_cpu(key->ptr[x - 1][key->off[x - 1]])) != + 0)) { + if ((key->ibh[x] = + __bread(key->inode->i_sb->s_bdev, tmp, + key->inode->i_sb->s_blocksize)) + != NULL) + key->ptr[x] = (u32 *) (key->ibh[x]->b_data); + else + key->ptr[x] = NULL; + } else { + key->ibh[x] = NULL; + key->ptr[x] = NULL; + } + } + + return 1; +} + + +/* Method to free the key: just release buffers. + + Returns 0 on success, -errno on error. +*/ + +static int ext2_free_key(struct ext2_bkey *key) +{ + int x, n; + struct buffer_head *bh[4]; + + assert(key->inode); + assert(atomic_read(&(key->inode)->i_mutex.count) <= 0); + + + for (x = 0, n = 0; x <= key->level; x++) { + if (key->ibh[x] != NULL) { + if (IS_SYNC(key->inode) && buffer_dirty(key->ibh[x])) + bh[n++] = key->ibh[x]; + else + brelse(key->ibh[x]); + } + } + + if (n > 0) { + int ncopy = n; + while (ncopy-- > 0) { + assert(buffer_mapped(bh[ncopy]) + && (bh[ncopy]->b_bdev != NULL)); + } + + ll_rw_block(WRITE, n, bh); + + CHECK_NOT_ATOMIC + + while (n-- > 0) { + wait_on_buffer(bh[n]); + /* TODO: Check for error. */ + brelse(bh[n]); + } + } + return 0; +} + + +/* Returns positive if specified cluster is compressed, + zero if not, + -errno if an error occurred. + + If you need the result to be accurate, then down i_sem before + calling this, and don't raise i_sem until after you've used the + result. */ +int ext2_cluster_is_compressed_fn(struct inode *inode, unsigned cluster) +{ + unsigned block = (ext2_cluster_block0(inode, cluster) + + ext2_cluster_nblocks(inode, cluster) + - 1); + struct ext2_bkey key; + int result; + + assert(atomic_read(&inode->i_mutex.count) <= 0); + + /* impl: Not all callers of ext2_cluster_is_compressed_fn() have + i_sem down. Of course it is impossible to guarantee + up-to-date information for such callers (someone may + compress or decompress between when we check and when they + use the information), so hopefully it won't matter if the + information we return is slightly inaccurate (e.g. because + someone is de/compressing the cluster while we check). */ + if (!ext2_get_key(&key, inode, block)) + return -EIO; + + result = (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR); + ext2_free_key(&key); + return result; +} + + +/* Support for the GETCOMPRRATIO ioctl() call. We calculate how many + blocks the file would hold if it weren't compressed. This requires + reading the cluster head for every compressed cluster. + + Returns either -EAGAIN or the number of blocks that the file would + take up if uncompressed. */ +int ext2_count_blocks(struct inode *inode) +{ + struct buffer_head *head_bh; + int count; + int cluster; + struct ext2_bkey key; + u32 end_blknr; + + if (!(EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL)) + return inode->i_blocks; + + mutex_lock(&inode->i_mutex); + end_blknr = ROUNDUP_RSHIFT(inode->i_size, + inode->i_sb->s_blocksize_bits); + + /* inode->i_blocks is stored in units of 512-byte blocks. It's + more convenient for us to work in units of s_blocksize. */ + { + u32 shift = inode->i_sb->s_blocksize_bits - 9; + + count = inode->i_blocks; + if (count & ((1 << shift) - 1)) + ext2_msg(inode->i_sb, + "ext2_count_blocks", + "i_blocks not multiple of blocksize"); + count >>= shift; + } + + cluster = 0; + if (!ext2_get_key(&key, inode, 0)) { + count = -EIO; + goto out; + } + while (key.block < end_blknr) { + u32 head_blkaddr = ext2_get_key_blkaddr(&key); + + /* bug fix: init head_bh for each iteration TLL 2/21/07 */ + head_bh = NULL; + if (head_blkaddr == EXT2_COMPRESSED_BLKADDR) { + count = -EXT2_ECOMPR; + break; + } + if (!ext2_next_key(&key, ext2_cluster_nblocks(inode, cluster) - 1)) + break; + if (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR) { + struct ext2_cluster_head *head; + + if (head_blkaddr == 0) { + count = -EXT2_ECOMPR; + break; + } + head_bh = __getblk(inode->i_sb->s_bdev, + head_blkaddr, inode->i_sb->s_blocksize); + if (head_bh == NULL) { + /* Hmm, EAGAIN or EIO? */ + count = -EAGAIN; + break; + } + if (!buffer_uptodate(head_bh)) + ll_rw_block(READ, 1, &head_bh); + + CHECK_NOT_ATOMIC + + wait_on_buffer(head_bh); + +#ifdef CONFIG_HIGHMEM + if (!page_address(head_bh->b_page)) { + BUG(); + } +#endif + + head = (struct ext2_cluster_head *) head_bh->b_data; + /* remove clen > ulen test TLL 2/21/07 */ + if ((head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) + || (le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES) + || (head->holemap_nbytes > 4)) { + count = -EXT2_ECOMPR; + break; + } + assert(sizeof(struct ext2_cluster_head) == 16); + count += (ROUNDUP_RSHIFT(le32_to_cpu(head->ulen), + inode->i_sb->s_blocksize_bits) + - ROUNDUP_RSHIFT((le32_to_cpu(head->clen) + + sizeof(struct ext2_cluster_head) + + head->holemap_nbytes), + inode->i_sb->s_blocksize_bits)); + brelse(head_bh); + head_bh = NULL; + } + + if (!ext2_next_key(&key, 1)) + break; + cluster++; + } + ext2_free_key(&key); + if (head_bh != NULL) + brelse(head_bh); + out: + mutex_unlock(&inode->i_mutex); + if (count == -EXT2_ECOMPR) { + ext2_msg(inode->i_sb, + "ext2_count_blocks", + "invalid compressed cluster %u of inode %lu", + cluster, inode->i_ino); + EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL; + } + + /* The count should be in units of 512 (i.e. 1 << 9) bytes. */ + if (count >= 0) + count <<= inode->i_sb->s_blocksize_bits - 9; + return count; +} + + +/* Decompress some blocks previously obtained from a cluster. + Decompressed data is stored in ext2_rd_wa.u. Buffer heads in the bh + array are packed together at the begining of the array. The ulen + argument is an indication of how many bytes the caller wants to + obtain, excluding holes. (This can be less than head->ulen, as in the + case of readpage.) No hole processing is done; we don't even look at + head->holemap. + + Note the semantic difference between this and + (): the latter decompresses a cluster _and + stores it as such_, whereas ext2_decompress_blocks() just + decompresses the contents of the blocks into ext2_rd_wa.u. + + The working area is supposed to be available and locked. + + Returns a negative value on failure, the number of bytes + decompressed otherwise. + + Called by : + + ext2_decompress_cluster () [sem down] + ext2_readpage () [sem down, but only ifndef EXT2_LOCK_BUFFERS] */ + +/* TODO: ext2_decompress_blocks() scribbles in ext2_rd_wa.c. + Check callers to make sure this isn't a problem. */ + +/* mw: caller must already have done: "get_cpu_var(ext2_rd_wa)" */ +size_t +ext2_decompress_blocks(struct inode * inode, + struct buffer_head ** bh, + int nblk, size_t ulen, u32 cluster) +{ + struct ext2_cluster_head *head; + int count, src_ix, x; + unsigned char *dst; + unsigned meth, alg; + char bdn[BDEVNAME_SIZE]; + +#ifdef EXT2_COMPR_DEBUG + assert(in_atomic()); + assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */ +#endif + + /* + We pack the buffer together before (and must take care + not to duplicate the buffer heads in the array). + + pjm 1998-01-09: Starting from e2compr-0.4.0, they should + already be packed together in the blkaddr array. TODO: + Insert appropriate assert() statements checking tht this is + the case. TODO: Check that callers have bh[] packed. */ +#ifdef EXT2_COMPR_REPORT + trace_e2c("ext2_decompress_blocks: nblk=%d\n", nblk); +#endif + for (src_ix = 0, x = 0; src_ix < nblk; src_ix++) { + if (bh[src_ix] == NULL) + printk("no_bheader()\n"); + if ((bh[src_ix] != NULL) && (bh[src_ix]->b_blocknr != 0)) { + + if (x < src_ix) { + ext2_msg(inode->i_sb, "bad buffer table", + "inode = %lu", inode->i_ino); + goto error; + } + x++; + } + } + + nblk = x; +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c("ext2_decompress_blocks (2): nblk=%d\n", nblk); +#endif + if (nblk == 0) { + ext2_msg(inode->i_sb, "no block in cluster", "inode = %lu", + inode->i_ino); + goto error; + } + + restore_b_data_himem(bh[0]); + head = (struct ext2_cluster_head *) (bh[0]->b_data); + + /* + * Do some consistency checks. + */ + + if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) { + ext2_msg(inode->i_sb, + "bad magic number", + "inode = %lu, magic = %#04x", + inode->i_ino, le16_to_cpu(head->magic)); + goto error; + } +#if EXT2_GRAIN_SIZE & (EXT2_GRAIN_SIZE - 1) +# error "This code assumes EXT2_GRAIN_SIZE to be a power of two." +#endif + /* The macro also assumes that _a > 0, _b > 0. */ +#define ROUNDUP_GE(_a, _b, _d) ( ( ((_a) - 1) \ + | ((_d) - 1)) \ + >= ( ((_b) - 1) \ + | ((_d) - 1))) + + //mw: following 3 just for debugging!!! + assert(!((le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES))); + assert(!((head->clen == 0))); + assert(!(ROUNDUP_GE(le32_to_cpu(head->clen) + + head->holemap_nbytes + sizeof(struct ext2_cluster_head), + le32_to_cpu(head->ulen), EXT2_GRAIN_SIZE))); + + if ((le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES) + || (head->clen == 0) + || ROUNDUP_GE(le32_to_cpu(head->clen) + + head->holemap_nbytes + + sizeof(struct ext2_cluster_head), + le32_to_cpu(head->ulen), EXT2_GRAIN_SIZE)) { + ext2_msg(inode->i_sb, + "invalid cluster len", + "inode = %lu, len = %u:%u", + inode->i_ino, + le32_to_cpu(head->clen), le32_to_cpu(head->ulen)); + goto error; + } +#undef ROUNDUP_GE + + /* TODO: Test for `nblk != 1 + ...' instead of the current + one-sided test. However, first look at callers, and make + sure that they handle the situation properly (e.g. freeing + unneeded blocks) and tht they always pass a correct + value for nblk. */ + if (nblk <= ((le32_to_cpu(head->clen) + + head->holemap_nbytes + sizeof(struct ext2_cluster_head) + - 1) + / bh[0]->b_size)) { + int i; + ext2_msg(inode->i_sb, + "missing blocks", + "inode = %lu, blocks = %d/%u", + inode->i_ino, nblk, ((le32_to_cpu(head->clen) + + head->holemap_nbytes + + sizeof(struct ext2_cluster_head) + - 1) + / bh[0]->b_size) + 1); + printk("i_size=%d\n", (int) inode->i_size); + for (i = 0; i < 12; i++) + printk("i_data[%d]=%d\n", i, EXT2_I(inode)->i_data[i]); + printk("cluster_head (sizeof head=%u):\n\tmagic=0x%4x\n\tmethod=%d\n\t \ + holemap_nbytes=%d\n\tulen=%d\n\tclen=%d\n\tbh->b_size=%zu\n", + sizeof(struct ext2_cluster_head), head->magic, + (int) head->method, (int) head->holemap_nbytes, head->ulen, + head->clen, bh[0]->b_size); + goto error; + } + + /* I moved it here in case we need to load a module that + * needs more heap that is currently allocated. + * In such case "init_module" for that algorithm forces + * re-allocation of ext2_wa. It should be safe here b/c the + * first reference to ext2_wa comes just after and we have + * locked ext2_wa before. + * + * FIXME: Totally separate working areas for reading and writing. + * Jan R. + */ + meth = head->method; /* only a byte, so no swabbing needed. */ + if (meth >= EXT2_N_METHODS) { + ext2_msg(inode->i_sb, + "Ass: illegal method id", + "inode = %lu, id = %u", inode->i_ino, meth); + dump_stack(); + goto error; + } + alg = ext2_method_table[meth].alg; + + /* + * Adjust the length if too many bytes are requested. + * + * TODO: Traiter les bitmaps ici, et non plus au niveau de + * l'appelant. Faire un petit cache en memorisant le + * numero du dernier noeud decompresse et du dernier + * cluster. Le pb, c'est qu'on ne peut pas savoir si + * les blocs ont ete liberes et realloue entre temps + * -> il faut etre prevenu pour invalider le buffer. + * + * pjm fixme tr: Take care of the bitmaps here, + * instead of by the caller as we currently do. Keep + * a small cache that holds the number of the + * previous to have been + * decompressed. The problem is that we have no way + * of knowing whether the blocks have been freed and + * reallocated in the meantime / since last time -> + * we must be informed so that we can invalidate the + * buffer. */ + if (ulen > le32_to_cpu(head->ulen)) { + memset(__get_cpu_var(ext2_rd_wa)->u + le32_to_cpu(head->ulen), 0, ulen - le32_to_cpu(head->ulen)); + ulen = le32_to_cpu(head->ulen); + + assert((bh[0]->b_size & (bh[nblk - 1]->b_size - 1)) == 0); + if (((le32_to_cpu(head->clen) + + head->holemap_nbytes + sizeof(struct ext2_cluster_head) + - 1) + | (bh[0]->b_size - 1)) + >= ((ulen - 1) | (bh[0]->b_size - 1))) { + printk(KERN_WARNING + "ext2_decompress_blocks: " + "ulen (=%zu) or clen (=%u) wrong " + "in dev %s, inode %lu.\n", + ulen, le32_to_cpu(head->clen), + bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino); + goto error; + } + } + + /* + * Now, decompress data. + */ + /* TODO: Is this (ulen == 0) possible? */ + if (ulen == 0) + return 0; + + for (x = 0, dst = __get_cpu_var(ext2_rd_wa)->c; x < nblk; dst += bh[x++]->b_size) { + restore_b_data_himem(bh[x]); + memcpy(dst, bh[x]->b_data, bh[x]->b_size); + } + + + if (!ext2_algorithm_table[alg].avail) { + ext2_msg(inode->i_sb, + "ext2_decompress_blocks", + "algorithm `%s' not available for inode %lu", + ext2_algorithm_table[alg].name, inode->i_ino); + ext2_mark_algorithm_use(inode, alg); + goto error; + } + + +#ifdef EXT2_COMPR_DEBUG + { + struct ext2_cluster_head *wa1head = (struct ext2_cluster_head *) __get_cpu_var(ext2_rd_wa)->c; + unsigned clen = le32_to_cpu(wa1head->clen); + if (wa1head->checksum != + cpu_to_le32(ext2_adler32 + (le32_to_cpu(*(u32 *) __get_cpu_var(ext2_rd_wa)->c), + __get_cpu_var(ext2_rd_wa)->c + 8, + (sizeof(struct ext2_cluster_head) - 8 + + head->holemap_nbytes + clen)))) + { + head->checksum = cpu_to_le32(0); + ext2_msg(inode->i_sb, "ext2_decompress_blocks: corrupted compressed data ", + "in inode %lu", inode->i_ino); + //goto error; + //mw: we try to go on. if data is corrupt we will get an compression error anyway. + } + } +#endif + + count = ext2_algorithm_table[alg].decompress(__get_cpu_var(ext2_rd_wa)->c + + sizeof(struct + ext2_cluster_head) + + head->holemap_nbytes, + __get_cpu_var(ext2_rd_wa)->u, + __get_cpu_var(ext2_rd_wa)->heap, + le32_to_cpu(head->clen), ulen, + ext2_method_table[meth].xarg); + + /* If we got fewer than ulen bytes, there is a problem, since + we corrected the ulen value before decompressing. Note + that it's OK for count to exceed ulen, because ulen can be + less than head->ulen. */ + if ((count < ulen) || (count != le32_to_cpu(head->ulen))) { + ext2_msg(inode->i_sb, + "ext2_decompress_blocks: corrupted compressed data ", "inode = %lu, count = %u of %zu (%u/%u)", + inode->i_ino, count, ulen, le32_to_cpu(head->clen), le32_to_cpu(head->ulen)); + goto error; + } + ext2_ensure_algorithm_use(inode, alg); + return count; + + error: + + /* Raise the ECOMPR flag for this file. What this means is + that the file cannot be written to, and can only be read if + the user raises the NOCOMPR flag. + + pjm 1997-01-16: I've changed it so that files with ECOMPR + still have read permission, so user can still read the rest + of the file but get an I/O error (errno = EXT2_ECOMPR) when + they try to access anything from this cluster. */ + + EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL; + + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + /* pjm 1998-02-21: We used to do `memset(ext2_rd_wa.u, 0, ulen)' + here because once upon a time the user could sometimes see + buf contents. I believe that this can never happen any + more. */ + return -EXT2_ECOMPR; +} + + +/* ext2_calc_free_ix: Calculates the position of the C_NBLK'th non-hole + block; equals C_NBLK plus the number of holes in the first CALC_FREE_IX() + block positions of the cluster. + + pre: 1 =< c_nblk < EXT2_MAX_CLUSTER_BLOCKS, + Number of 1 bits in ,ubitmap` > ,c_nblk`. + post: c_nblk =< calc_free_ix() < EXT2_MAX_CLUSTER_BLOCKS + + Called by: + ext2_decompress_cluster() + ext2_file_write() + + TODO: Have ext2_compress_cluster() call this. + */ +unsigned ext2_calc_free_ix(unsigned holemap_nbytes, u8 const *holemap, + unsigned c_nblk) +{ + unsigned i; + + assert(1 <= c_nblk); + assert(c_nblk < EXT2_MAX_CLUSTER_BLOCKS); + for (i = 0; (i < holemap_nbytes * 8) && (c_nblk > 0);) { + assert(i < EXT2_MAX_CLUSTER_BLOCKS - 1); + if ((holemap[i >> 3] & (1 << (i & 7))) == 0) + c_nblk--; + i++; + } + i += c_nblk; + assert(i < EXT2_MAX_CLUSTER_BLOCKS); + return i; +} + + +/* (): Prepare the blkaddr[] array for + decompression by moving non-hole blocks to their proper positions + (according to ubitmap) and zeroing any other blocks. + + Returns 0 on success, -errno on error. + + Note: We assume tht blkaddr[i] won't change under us forall + clu_block0 =< i < clu_block0 + clu_nblocks. Holding i_sem should + guarantee this. + + Called by: + ext2_decompress_cluster() + ext2_file_write() */ +int +ext2_unpack_blkaddrs(struct inode *inode, + struct buffer_head *bh[], + int mmcp, + unsigned holemap_nbytes, + u8 const *holemap, + unsigned c_nblk, + unsigned free_ix, + unsigned clu_block0, unsigned clu_nblocks) +{ + struct ext2_bkey key; + u32 *blkaddr; + unsigned si, di; + + assert(clu_nblocks <= EXT2_MAX_CLUSTER_BLOCKS); + assert(1 <= c_nblk); + assert(c_nblk <= free_ix); + assert(free_ix < EXT2_MAX_CLUSTER_BLOCKS); + if (!ext2_get_key(&key, inode, clu_block0)) + return -EIO; + + if (key.ptr[key.level] == NULL) { + /* TODO: Call ext2_error(). */ + ext2_free_key(&key); + return -EIO; + } + + /* impl: Note tht we're relying on clusters not straddling + address block boundaries. */ + blkaddr = &key.ptr[key.level][key.off[key.level]]; + memset(blkaddr + free_ix, + 0, sizeof(*blkaddr) * (clu_nblocks - free_ix)); + si = c_nblk; + for (di = free_ix; di > si;) { + --di; + if (((di >> 3) < holemap_nbytes) + && (holemap[di >> 3] & (1 << (di & 7)))) { + blkaddr[di] = 0; + bh[di]->b_blocknr = 0; + clear_bit(BH_Mapped, &bh[di]->b_state); + } else { + if (si == 0) { + break; + } + blkaddr[di] = blkaddr[--si]; + assert(bh[di]->b_blocknr == 0); + assert(bh[si]->b_blocknr != 0); + assert(buffer_mapped(bh[si])); +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c("unpack: di=%d sts=0x%x si=%d blk=%ld sts=0x%x\n", + di, (int) bh[di]->b_state, si, bh[si]->b_blocknr, + (int) bh[si]->b_state); +#endif + bh[di]->b_blocknr = bh[si]->b_blocknr; + set_bit(BH_Mapped, &bh[di]->b_state); + bh[si]->b_blocknr = 0; + clear_bit(BH_Mapped, &bh[si]->b_state); + set_bit(BH_Uptodate, &bh[di]->b_state); + if (mmcp) { + restore_b_data_himem(bh[si]); + restore_b_data_himem(bh[di]); + memcpy(bh[di]->b_data, bh[si]->b_data, + inode->i_sb->s_blocksize); + } + } + } + if (key.level > 0) + mark_buffer_dirty(key.ibh[key.level]); + return ext2_free_key(&key); +} + + +/* + * Decompress one cluster. If already compressed, the cluster + * is decompressed in place, and the compress bitmap is updated. + * + * Returns the size of decompressed data on success, a negative + * value in case of failure, or 0 if the cluster was not compressed. + * + * The inode is supposed to be writable. + * + * Called by : + * + * ext2_decompress_inode() [sem down] + * ext2_file_write() [sem down] + * trunc_bitmap() [sem down] + */ +int ext2_decompress_cluster(struct inode *inode, u32 cluster) +{ + struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS]; + struct buffer_head *bhc[EXT2_MAX_CLUSTER_BLOCKS]; + struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES]; + int result, nbh; + unsigned npg, c_nblk; + struct ext2_cluster_head *head; + int i = 0; + unsigned free_ix, clu_block0, clu_nblocks; + int d_npg = -1; /* number of decompressed page */ + unsigned long allpagesuptodate = 1; + struct buffer_head *bh_writeout[EXT2_MAX_CLUSTER_BLOCKS]; + int bhn_writeout; +#ifdef CONFIG_HIGHMEM + int kmapped = 0; +#endif + + for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) { + bh_writeout[i] = NULL; + bhn_writeout = 0; + } + + assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */ + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) + epg[i] = NULL; + + /* + Get blocks from cluster. + Assign to variables head, ubitmap, clu_block0, clu_nblocks. + Shuffle blkaddr[] array and write zero to holes. + Allocate new blocks. + Get the working area. + Decompress. + Copy to bh[]->b_data (marking buffers uptodate and dirty). + Release working area. + Release bh[]. + */ + + nbh = 0; + npg = ext2_cluster_npages(inode, cluster); + result = ext2_get_cluster_pages(inode, cluster, pg, NULL, 0); + if (result <= 0) { + for (i = 0; i < npg; i++) + epg[i] = NULL; + goto out_err; + } + + for (i = 0; i < npg; i++) { + if ((pg[i]->index <= ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) && + !PageUptodate(pg[i])) { + allpagesuptodate = 0; + } + } + if (allpagesuptodate) { + //printk("DecompressPages: Ino:%lu\n", inode->i_ino); + result = ext2_decompress_pages(inode, cluster, pg); + if (result != 0) { + for (i = 0; i < npg; i++) + epg[i] = NULL; + if (result > 0) + goto cleanup; + else + goto out_err; + } + /*mw: if we continue here then in ext2_decompress_pages + * not all pages were up-to-date + */ + } + //printk("DecompressCluster: Ino:%lu\n", inode->i_ino); + result = ext2_get_cluster_extra_pages(inode, cluster, pg, epg); + if (result <= 0) { + goto out_err; + } +#ifdef CONFIG_HIGHMEM + ext2_kmap_cluster_pages(NULL, pg, epg); + kmapped = 1; +#endif + + result = ext2_get_cluster_blocks(inode, cluster, bh, pg, epg, 0); + if (result <= 0) { + goto out_err; + } + nbh = c_nblk = result; + + +#ifdef EXT2_COMPR_REPORT + { + int j; + printk + (" > > > ext2_decompress_cluster %d: inode=%ld, size=%d nbh=%d\n", + cluster, inode->i_ino, (int) inode->i_size, nbh); +#ifdef EXT2_COMPR_REPORT_VERBOSE + for (j = 0; j < nbh; j++) { + if (bh[j]) { + printk("0buffer_head[%d]: blocknr=%lu, addr=%p \n", j, + (unsigned long) bh[j]->b_blocknr, bh[j]); + if (bh[j]->b_page) + printk("0:[page->index=%ld]\n", bh[j]->b_page->index); + else + printk("[No page]\n"); + } else + printk("buffer_head[%d] is NULL\n", j); + } + while ((j < EXT2_MAX_CLUSTER_BLOCKS) && (bh[j] != NULL) && bh[j]->b_blocknr) { /*Add by Yabo Ding */ + printk + ("buffer_head[%d] is free but not NULL: blocknr=%lu, addr=%p\n", + j, (unsigned long) bh[j]->b_blocknr, bh[j]); + j++; + } +#endif + } +#endif + for (i = 0; i < nbh; i++) + assert(bh[i]->b_blocknr != 0); + + restore_b_data_himem(bh[0]); + + head = (struct ext2_cluster_head *) bh[0]->b_data; + if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) { + ext2_msg(inode->i_sb, + "ext2_decompress_cluster: bad magic number", + "cluster %d: inode = %lu, magic = %#04x", + cluster, inode->i_ino, le16_to_cpu(head->magic)); + EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL; + result = -EXT2_ECOMPR; + goto out_err; + } + if (le32_to_cpu(head->ulen) - + (c_nblk << inode->i_sb->s_blocksize_bits) <= 0) { + ext2_error(inode->i_sb, "ext2_decompress_cluster", + "ulen too small for c_nblk. ulen=%u, c_nblk=%u, bs=%lu", + le32_to_cpu(head->ulen), c_nblk, + inode->i_sb->s_blocksize); + EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL; + result = -EXT2_ECOMPR; + goto out_err; + } + free_ix = + ext2_calc_free_ix(head->holemap_nbytes, (u8 const *) (&head[1]), + c_nblk); + clu_block0 = ext2_cluster_block0(inode, cluster); + clu_nblocks = ext2_cluster_nblocks(inode, cluster); + ext2_unpack_blkaddrs(inode, bh, 1, + head->holemap_nbytes, (u8 const *) (&head[1]), + c_nblk, free_ix, clu_block0, clu_nblocks); + + /* Allocate the extra blocks needed. */ + { + int data_left = le32_to_cpu(head->ulen); + + data_left -= c_nblk << inode->i_sb->s_blocksize_bits; + assert(data_left > 0); + for (i = free_ix; i < clu_nblocks; i++) + if (((i >> 3) >= head->holemap_nbytes) + || !(head->holemap[i >> 3] & (1 << (i & 7)))) { + result = ext2_get_block(inode, + clu_block0 + i, + bh[i], 1 /* create */ ); + if (bh[i]->b_blocknr == 0) + goto out_err; + d_npg = + (i >> + (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) + + 1; + nbh++; + data_left -= inode->i_sb->s_blocksize; + if (data_left <= 0) + break; + } + } + + /* jmr 1998-10-28 Hope this is the last time I'm moving this code. + * Module loading must be done _before_ we lock wa, just think what + * can happen if we reallocate wa when somebody else uses it... + */ + { + unsigned meth; +#ifdef CONFIG_KMOD + unsigned alg; +#endif + + meth = head->method; /* only a byte, so no swabbing needed. */ + if (meth >= EXT2_N_METHODS) { + ext2_msg(inode->i_sb, + "Ass.: illegal method id", + "inode = %lu, id = %u", inode->i_ino, meth); + result = -EXT2_ECOMPR; + goto out_err; + } +#ifdef CONFIG_KMOD + alg = ext2_method_table[meth].alg; + if (!ext2_algorithm_table[alg].avail) { + char str[32]; + + sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name); + request_module(str); + } +#endif + } + + result = -EINTR; + + /* + * Then, decompress and copy back data. + */ + { + int ic; + + for (ic = 0, i = 0; i < clu_nblocks; i++) { + if (bh[i]->b_blocknr != 0) { + bhc[ic] = bh[i]; + ic++; + if (ic == c_nblk) { + break; + } + } + } + } + + +#ifdef EXT2_COMPR_REPORT_WA + printk(KERN_DEBUG "pid %d locks wa\n", current->pid); +#endif + if (get_cpu_var(ext2_rd_wa) == NULL) + { + ext2_alloc_rd_wa(); + } + assert(__get_cpu_var(ext2_rd_wa) != NULL); + + result = ext2_decompress_blocks(inode, bhc, c_nblk, + le32_to_cpu(head->ulen), cluster); + if (result != (int) le32_to_cpu(head->ulen)) { + if (result >= 0) { + /* I think this is impossible, as + ext2_decompress_blocks() checks against + head->ulen. */ + printk(KERN_WARNING "Unexpected return value %d " + "from ext2_decompress_blocks()\n", result); + result = -EXT2_ECOMPR; + } + +#ifdef EXT2_COMPR_REPORT_WA + printk(KERN_DEBUG "pid %d unlocks wa\n", current->pid); +#endif + put_cpu_var(ext2_rd_wa); + goto out_err; + } + +#ifdef EXT2_COMPR_REPORT + printk(KERN_DEBUG "ext2: %04x:%lu: cluster %d+%d [%d] " + "decompressed into %d bytes\n", + inode->i_rdev, + inode->i_ino, clu_block0, clu_nblocks, c_nblk, result); +#endif + + /* Copy back decompressed data. */ + { + int count = result; + unsigned char const *src; + int c, p; + int cbh; + int n; /* block index in page */ + struct buffer_head *bp; + unsigned addr0, b_start, b_end; + + assert(count > 0); + if (d_npg == -1) { + d_npg = ((count - 1) >> PAGE_CACHE_SHIFT) + 1; + } +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c + ("ext2_decompress_cluster: cnt=%d free_ix=%d d_npg=%d nbh=%d\n", + count, free_ix, d_npg, nbh); +#endif + result = -EXT2_ECOMPR; + src = __get_cpu_var(ext2_rd_wa)->u; + cbh = 0; + for (c = 0; c < clu_nblocks; c++) { + + if (bh[c]->b_blocknr == 0) { +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c("\t clear buf %d sts=0x%x\n", c, + (int) bh[c]->b_state); +#endif + restore_b_data_himem(bh[c]); + memset(bh[c]->b_data, 0, inode->i_sb->s_blocksize); + continue; + } + if (cbh >= (nbh - 1)) { + break; + } + if (count < inode->i_sb->s_blocksize) { + put_cpu_var(ext2_rd_wa); + goto out_err; + } + cbh++; + count -= inode->i_sb->s_blocksize; + p = c >> (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); + if (!PageUptodate(pg[p])) { + addr0 = (clu_block0 << inode->i_sb->s_blocksize_bits); + b_start = addr0 + (c << inode->i_sb->s_blocksize_bits); + b_end = b_start + inode->i_sb->s_blocksize; +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c("\t[%d] sts=0x%x e=%d s=%d sz=%d pg:%lu(%#x)\n", + c, (int) bh[c]->b_state, b_end, b_start, + (int) inode->i_size, pg[p]->index, + (unsigned int) pg[p]); +#endif + if (b_end <= inode->i_size) { + /* Block is before end of file, copy data */ + restore_b_data_himem(bh[c]); + memcpy(bh[c]->b_data, src, inode->i_sb->s_blocksize); + + } else if (b_start < inode->i_size) { + /* Block contains end of file, copy to end */ + restore_b_data_himem(bh[c]); + memcpy(bh[c]->b_data, src, inode->i_size - b_start); + + } + set_buffer_uptodate(bh[c]); + set_buffer_dirty(bh[c]); + bh_writeout[bhn_writeout] = bh[c]; //mw + bhn_writeout++; //mw + } else { + //mw: DEBUG. buffer is uptodate now. compress will not reread! an get the compressed data!!! + // clear flag in extra page!!! + // clear_bit(BH_Uptodate, &bh[c]->b_state); + + n = c & ((PAGE_CACHE_SIZE - 1) >> inode->i_sb-> + s_blocksize_bits); + bp = page_buffers(pg[p]); + for (i = 0; i < n; i++) { + bp = bp->b_this_page; + } + result = ext2_get_block(inode, clu_block0 + c, bp, 0); + + //mw: needed to do a writeback of the non-epg-buffers + //no idea how it was done before + set_buffer_uptodate(bp); + set_buffer_dirty(bp); + bh_writeout[bhn_writeout] = bp; //mw + bhn_writeout++; //mw + + if (bp->b_blocknr == 0) { + put_cpu_var(ext2_rd_wa); + goto out_err; + } + assert(bp->b_blocknr == bh[c]->b_blocknr); + } + src += inode->i_sb->s_blocksize; + } + if (count > inode->i_sb->s_blocksize) { + put_cpu_var(ext2_rd_wa); + goto out_err; + } + p = c >> (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); + if (!PageUptodate(pg[p])) { + addr0 = (clu_block0 << inode->i_sb->s_blocksize_bits); + b_start = addr0 + (c << inode->i_sb->s_blocksize_bits); +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c("\t[%d] sts=0x%x c=%d s=%d sz=%d pg:%lu(%#x)\n", c, + (int) bh[c]->b_state, count, b_start, + (int) inode->i_size, pg[p]->index, + (unsigned int) pg[p]); +#endif + if (b_start >= inode->i_size) { + restore_b_data_himem(bh[c]); + memset(bh[c]->b_data, 0, inode->i_sb->s_blocksize); + + } else { + if ((inode->i_size - b_start) < count) { + restore_b_data_himem(bh[c]); + memcpy(bh[c]->b_data, src, inode->i_size - b_start); + memset(bh[c]->b_data + (inode->i_size - b_start), 0, + count - (inode->i_size - b_start)); + } else { + restore_b_data_himem(bh[c]); + memcpy(bh[c]->b_data, src, count); + } + } + set_buffer_uptodate(bh[c]); + set_buffer_dirty(bh[c]); + bh_writeout[bhn_writeout] = bh[c]; //mw + bhn_writeout++; //mw + } else { + assert(epg[p] != NULL); //mw + n = c & ((PAGE_CACHE_SIZE - 1) >> inode->i_sb-> + s_blocksize_bits); + bp = page_buffers(pg[p]); + for (i = 0; i < n; i++) { + bp = bp->b_this_page; + } + result = ext2_get_block(inode, clu_block0 + c, bp, 0); + + //mw: needed to do a writeback of the non-epg-buffers + //no idea how it was done before + set_buffer_uptodate(bp); + set_buffer_dirty(bp); + bh_writeout[bhn_writeout] = bp; //mw + bhn_writeout++; //mw + if (bp->b_blocknr == 0) { + put_cpu_var(ext2_rd_wa); + goto out_err; + } + assert(bp->b_blocknr == bh[c]->b_blocknr); + } + result = (nbh - 1) * inode->i_sb->s_blocksize + count; + } + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (pg[i] == NULL) + break; + if (i < d_npg) + SetPageUptodate(pg[i]); + } + +#ifdef EXT2_COMPR_REPORT_WA + printk(KERN_DEBUG "pid %d unlocks wa\n", current->pid); +#endif + put_cpu_var(ext2_rd_wa); + + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + /* If needed, EXT2_DIRTY_FL is raised by the caller. */ + +#if 0 + /* TODO: SYNC */ + if (IS_SYNC(inode)) { + generic_osync_inode(inode, inode->i_mapping, + OSYNC_METADATA | OSYNC_DATA); + } +#endif + assert(result >= 0); + + //Sync out changes: + assert(bhn_writeout <= EXT2_MAX_CLUSTER_BLOCKS); + assert(bhn_writeout >= 0); + + //mw: debug + for (i = 0; i < bhn_writeout; i++) { + if ((!buffer_mapped(bh_writeout[i])) + || (bh_writeout[i]->b_bdev == NULL)) { + u32 block = ext2_cluster_block0(inode, cluster); + ext2_get_block(inode, block + i, bh_writeout[i], 1); + //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh_writeout[i]->b_blocknr, (bh_writeout[i]->b_state & BH_Mapped), (bh_writeout[i]->b_page ? bh_writeout[i]->b_page->index : 0), bh_writeout[i]->b_bdev ); + } + assert(buffer_mapped(bh_writeout[i])); + assert(bh_writeout[i]->b_bdev != NULL); + assert(bh_writeout[i]->b_bdev == inode->i_sb->s_bdev); + /*if (bh_writeout[i]->b_bdev == NULL) + bh_writeout[i]->b_bdev = inode->i_sb->s_bdev; //fix bdev-bug */ + } + + ll_rw_block(WRITE, bhn_writeout, bh_writeout); + //mw: seems we have to wait here, otherwise: crash! + + CHECK_NOT_ATOMIC + for (i = 0; i < bhn_writeout; i++) { + if (bh_writeout[i]) + wait_on_buffer(bh_writeout[i]); + } + goto cleanup; + + out_err: + printk("Error in Decompressing cluster: Err=%i\n", result); + + cleanup: + +#ifdef CONFIG_HIGHMEM + if (kmapped) + ext2_kunmap_cluster_pages(NULL, pg, epg); +#endif + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (pg[i] == NULL) + break; + unlock_page(pg[i]); + page_cache_release(pg[i]); + } + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (epg[i] != NULL) { + ClearPageDirty(epg[i]); + ClearPageUptodate(epg[i]); + try_to_free_buffers(epg[i]); + unlock_page(epg[i]); + assert(page_count(epg[i]) == 1); + page_cache_release(epg[i]); + } + } + + /* + * Release buffers, don't forget to unlock the locked ones. + * pjm 1998-01-14: TO_DO: Locked ones? + */ + assert(nbh >= 0); + assert(nbh <= EXT2_MAX_CLUSTER_BLOCKS); +#ifdef EXT2_COMPR_REPORT + trace_e2c(" < < < ext2_decompress_cluster %d: inode=%ld, res=%i\n", + cluster, inode->i_ino, result); +#endif + return result; +} + + +/* + * Function to decompress the pages of a cluster. + * + * Allocate buffers to pages what are not mapped on the device. + * + * Returns the size of decompressed data on success, a negative + * value in case of failure, or 0 if some pages are not uptodate. + * + * The inode is supposed to be writable. + * All the pages must be UPTODATE, + */ +int ext2_decompress_pages(struct inode *inode, u32 cluster, + struct page *pg[]) +{ + struct ext2_cluster_head *head; + struct buffer_head *bh0; + struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS]; + unsigned nbh, c_nblk; + unsigned free_ix, clu_block0, clu_nblocks; + int i, pagesPerCluster, data_left, size = 0; + long status = 0; + char *dp; + struct buffer_head *bh_writeout[EXT2_MAX_CLUSTER_BLOCKS]; + int bhn_writeout; +#ifdef CONFIG_HIGHMEM + int kmapped = 0; + + ext2_kmap_cluster_pages(NULL, pg, NULL); + kmapped = 1; +#endif + + for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) { + bh_writeout[i] = NULL; + bhn_writeout = 0; + } + + /* First, get cluster_head (For this, we need to re-read the first block of + the cluster, without overwriting the data of the page the buffer point to... */ + /* This suppose that cluster are aligned with PAGE_SIZE... To be improved */ + + /* Changed by Yabo Ding, + The old code cannot reread data from disk to a changed buffers data pointer in 2.6.x. + So, I copy memory data(decompressed) to a temporary buffer; + Then reread data(compressed) from disk, and copy to head; + Then copy back the memory data from temporary buffer. + It seems clumsy, but it works well. + */ + + bh0 = page_buffers(pg[0]); + restore_b_data_himem(bh0); + + head = (struct ext2_cluster_head *) kmalloc(bh0->b_size, GFP_KERNEL); + if (head == NULL) { + ext2_msg(inode->i_sb, "no more memory", "inode = %lu", + inode->i_ino); + status = -EIO; + goto out_x; + } + dp = kmalloc(bh0->b_size, GFP_KERNEL); + if (dp == NULL) { + ext2_msg(inode->i_sb, "no more memory", "inode = %lu", + inode->i_ino); + kfree(head); + status = -EIO; + goto out_x; + } + memcpy(dp, bh0->b_data, bh0->b_size); + clear_bit(BH_Uptodate, &bh0->b_state); + if (!buffer_mapped(bh0)) { + status = + ext2_get_block(inode, ext2_cluster_block0(inode, cluster), bh0, + 0); + if (bh0->b_blocknr == 0) { + trace_e2c + ("ext2_decompress_pages: ext2_get_block error %ld (cluster = %u)\n", + status, cluster); + kfree(head); + memcpy(bh0->b_data, dp, bh0->b_size); + kfree(dp); + status = -EIO; + goto out; + } + } + ll_rw_block(READ, 1, &bh0); + + CHECK_NOT_ATOMIC + wait_on_buffer(bh0); + //printk("RE-Read: Buffer: blocknr:%lu(%#x) \n", bh0->b_blocknr, bh0); + if (!buffer_uptodate(bh0)) { /* Read error ??? */ + trace_e2c("ext2_decompress_pages: IO error (cluster = %u)\n", + cluster); + kfree(head); + memcpy(bh0->b_data, dp, bh0->b_size); + kfree(dp); + status = -EIO; + goto out; + } + /* This suppose that cluster are aligned with PAGE_SIZE... To be improved + bh0->b_data = page_address(pg[0]); */ + memcpy((char *) head, bh0->b_data, bh0->b_size); + memcpy(bh0->b_data, dp, bh0->b_size); + kfree(dp); + + if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) { + ext2_msg(inode->i_sb, + "ext2_decompress_pages: bad magic number", + "inode = %lu, magic = %#04x", inode->i_ino, + le16_to_cpu(head->magic)); + kfree(head); + status = -EIO; + goto out; + } +#ifdef EXT2_COMPR_REPORT + trace_e2c("ext2_decompress_pages: clt=%d i=%ld head=0x%x\n", cluster, + inode->i_ino, (unsigned) head); +#endif + + /* Now, try to do the same as in ext2_decompress_cluster for moving/allocating blocks */ + nbh = 0; + pagesPerCluster = ext2_cluster_npages(inode, cluster); + for (i = 0; i < pagesPerCluster && pg[i]; i++) { + assert(PageLocked(pg[i])); + //if (!(PageUptodate(pg[i]))) { + //mw: do it like ext2_decompress_cluster to handle end of a file correctly + if (!(PageUptodate(pg[i])) + && (pg[i]->index <= ((inode->i_size - 1) >> PAGE_CACHE_SHIFT))) { + kfree(head); + printk("should never happen: not all pages uptodate!\n"); //mw + status = 0; + goto out_x; + } + } + + for (i = 0; i < pagesPerCluster && pg[i]; i++) { + struct buffer_head *bhead, *bhx; + int idx = 0; + + /* assert(PageUptodate(pg[i])); with ftruncate() can be false */ + if (!page_has_buffers(pg[i])) { + ClearPageUptodate(pg[i]); /*mw */ + ClearPageDirty(pg[i]); /*mw */ + assert(0); + create_empty_buffers_e2c(pg[i], inode->i_sb->s_blocksize, 0, + inode); + if (unlikely(!page_has_buffers(pg[i]))) + printk("Error: NOMEM!\n"); + } + bhead = page_buffers(pg[i]); + for (bhx = bhead; bhx != bhead || !idx; bhx = bhx->b_this_page) { + idx++; + bh[nbh] = bhx; + nbh++; + } + } + + while ((nbh != 0) && (bh[nbh - 1]->b_blocknr == 0)) + --nbh; + + c_nblk = nbh; + + free_ix = + ext2_calc_free_ix(head->holemap_nbytes, (u8 const *) (&head[1]), + c_nblk); + clu_block0 = ext2_cluster_block0(inode, cluster); + clu_nblocks = ext2_cluster_nblocks(inode, cluster); + ext2_unpack_blkaddrs(inode, bh, 0, head->holemap_nbytes, + (u8 const *) (&head[1]), c_nblk, free_ix, + clu_block0, clu_nblocks); + + /* Allocate the extra blocks needed. */ + data_left = size = le32_to_cpu(head->ulen); + + data_left -= c_nblk << inode->i_sb->s_blocksize_bits; + assert(data_left > 0); + for (i = 0; i < free_ix; i++) { + if (bh[i]->b_blocknr != 0) { +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c("\t [%d] blk=%ld sts=0x%x\n", i, bh[i]->b_blocknr, + (int) bh[i]->b_state); +#endif + set_buffer_dirty(bh[i]); + bh_writeout[bhn_writeout] = bh[i]; //mw + bhn_writeout++; //mw + } + } + + for (i = free_ix; i < clu_nblocks; i++) { + if (((i >> 3) >= head->holemap_nbytes) + || !(head->holemap[i >> 3] & (1 << (i & 7)))) { + status = + ext2_get_block(inode, clu_block0 + i, bh[i], + 1 /* create */ ); + if (status || bh[i]->b_blocknr == 0) { + status = -EIO; + goto out; + } +#ifdef EXT2_COMPR_REPORT_CPR + trace_e2c("\t [%d] blk=%ld sts=0x%x\n", i, bh[i]->b_blocknr, + (int) bh[i]->b_state); +#endif + set_bit(BH_Uptodate, &bh[i]->b_state); + set_buffer_dirty(bh[i]); + bh_writeout[bhn_writeout] = bh[i]; //mw + bhn_writeout++; //mw + nbh++; + data_left -= inode->i_sb->s_blocksize; + if (data_left <= 0) + break; + } + } + + out: + kfree(head); + + out_x: + + for (i = 0; i < bhn_writeout; i++) { + + if ((!buffer_mapped(bh_writeout[i])) + || (bh_writeout[i]->b_bdev == NULL)) { + u32 block = ext2_cluster_block0(inode, cluster); + ext2_get_block(inode, block + i, bh_writeout[i], 1); + //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh_writeout[i]->b_blocknr, (bh_writeout[i]->b_state & BH_Mapped), (bh_writeout[i]->b_page ? bh_writeout[i]->b_page->index : 0), bh_writeout[i]->b_bdev ); + } + assert(buffer_mapped(bh_writeout[i])); + assert(bh_writeout[i]->b_bdev != NULL); + assert(bh_writeout[i]->b_bdev == inode->i_sb->s_bdev); + /*if (bh_writeout[i]->b_bdev == NULL) + bh_writeout[i]->b_bdev = inode->i_sb->s_bdev; //fix bdev-bug */ + } + //Sync out changes: + ll_rw_block(WRITE, bhn_writeout, bh_writeout); + //mw: seems we have to wait here, otherwise: crash! + + CHECK_NOT_ATOMIC + for (i = 0; i < bhn_writeout; i++) { + if (bh_writeout[i]) + wait_on_buffer(bh_writeout[i]); + } + + +#ifdef CONFIG_HIGHMEM + if (kmapped) + ext2_kunmap_cluster_pages(NULL, pg, NULL); +#endif + + return (status ? status : size); +} + + +/* Decompress every cluster that is still compressed. + We stop and return -ENOSPC if we run out of space on device. + + The caller needs to check for EXT2_COMPRBLK_FL before calling. + + Returns 0 on success, -errno on failure. + + Called by ext2_ioctl(). */ +int ext2_decompress_inode(struct inode *inode) +{ + u32 cluster; + u32 n_clusters; + int err = 0; + struct ext2_inode_info *ei = EXT2_I(inode); + + assert(ei->i_flags & EXT2_COMPRBLK_FL); + + /* Quotas aren't otherwise kept if file is opened O_RDONLY. */ + dquot_initialize(inode); + + //mutex_lock(&inode->i_mutex); /* MW 5-16-07 */ + assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */ + err = 0; + /* This test can succeed because down() (and I think DQUOT_INIT) can block. */ + if (!(ei->i_flags & EXT2_COMPRBLK_FL)) + goto out; + + n_clusters = ext2_n_clusters(inode); + for (cluster = 0; cluster < n_clusters; cluster++) { + err = ext2_cluster_is_compressed_fn(inode, cluster); + if (err > 0) { + err = ext2_decompress_cluster(inode, cluster); + /* If we later get an error, we'll need to recompress. */ + ei->i_flags |= EXT2_DIRTY_FL; + ei->i_compr_flags |= EXT2_CLEANUP_FL; + } + if (err < 0) + goto error; + } + assert(err >= 0); + err = 0; + ei->i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL); + ei->i_compr_flags &= ~EXT2_CLEANUP_FL; + error: + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + out: +// mutex_unlock(&inode->i_mutex); /* MW 5-16-07 */ + return err; +} + + +/* + TODO: SECRM_FL + + TODO: Avant de liberer les blocs, regarder si le compteur + est a 1, et marquer le noeud si ce n'est pas le cas + (pour preparer la recompression immediate). + + pjm fixme translation. + "Before freeing the blocks, check if the counter is 1, + and mark the inode if not (in order to prepare for + immediate recompression)." */ + +/* This is called by ext2_compress_cluster to free the blocks now + available due to compression. We free ,nb` blocks beginning with + block ,block`. We set the address of each freed block to + EXT2_COMPRESSED_BLKADDR, thus marking the cluster as compressed. + N.B. It is up to the caller to adjust i_blocks. */ + +/* TODO: ext2_truncate() is much more careful than this routine. + (E.g. it checks for bh->b_count > 1, and checks for things changing + underneath it. It also calls bforget instead of brelse if it's + going to free it.) Why? Maybe we should copy it. */ + +/* effic: Reduce the number of calls to ext2_free_block() the way + ext2_trunc_direct() does. */ + +/* fixme: I think tht we do indeed need to check if buffers are held by + somebody else before freeing them. */ +static int ext2_free_cluster_blocks(struct inode *inode, u32 block, + unsigned nb) +{ + u32 tmp; + struct ext2_bkey key; + int err; + +/* + * whitpa 04 Oct 2004: although it may be true that using e2compr in + * conjunction with quotas is a Bad Idea, having quotas enabled for other + * filesystems doesn't necessarily mean that the quota feature will actually be + * used in this one, so many people find the following assertion very annoying. + * I have therefore disabled it. + */ +/* assert (!inode->i_sb->dq_op || (inode->i_flags & S_QUOTA)); */ + if (!nb) + return 0; + if (nb > EXT2_MAX_CLU_NBLOCKS) { + assert((int) nb >= 0); + assert(nb <= EXT2_MAX_CLU_NBLOCKS); + return -EDOM; + } + assert(((block + nb) & 3) == 0); + if (!ext2_get_key(&key, inode, block)) + return -EIO; + + while (nb-- > 0) { + tmp = ext2_get_key_blkaddr(&key); + err = ext2_set_key_blkaddr(&key, EXT2_COMPRESSED_BLKADDR); + if (err) + goto out; + if (tmp != 0) { + assert(tmp != EXT2_COMPRESSED_BLKADDR); +#ifdef EXT2_COMPR_REPORT_ALLOC + printk(KERN_DEBUG "ext2: free %d = (%d) %d:%d:%d:%d : %d\n", + key.block, + key.level, + key.off[0], key.off[1], key.off[2], key.off[3], tmp); +#endif + ext2_free_blocks(inode, tmp, 1); + } + if (!ext2_next_key(&key, 1)) + break; + } + err = 0; + out: + ext2_free_key(&key); + return err; +} + +#ifdef EXT2_COMPR_DEBUG +static unsigned count_bits(unsigned char *p, unsigned nb) +{ + u32 x = le32_to_cpu(*(u32 *) p); + unsigned n = 0; + + assert(nb <= 4); + if (nb != 4) + x &= (1 << (nb * 8)) - 1; + while (x) { + x &= (x - 1); + n++; + } + return n; +} +#endif + +/* + * __remove_compr_assoc_queue is used in invalidate_inode_buffers + * replacement code for ext2_compress_cluster(). TLL 02/21/07 + * Yeah, it is duplicate code, but using it does not require + * patching fs/buffer.c/__remove_assoc_queue to export it. + * The buffer's backing address_space's private_lock must be held. + */ +/*static inline void __remove_compr_assoc_queue(struct buffer_head *bh) +{ + list_del_init(&bh->b_assoc_buffers); +}*/ + +/* Compress one cluster. If the cluster uses fewer blocks once + compressed, it is stored in place of the original data. Unused + blocks are freed, and the cluster is marked as compressed. + + Returns a negative value on error, + 0 if the cluster does not compress well, + positive if it is compressed (whether it was already compressed + or whether we compressed it). + + Assume inode is writable. + + Called by : + + ext2_cleanup_compressed_inode () [i_sem] + + If ever we acquire new callers, make sure that quotas are + initialised, and COMPRBLK is handled correctly (i.e. such + that ioctl() can't change the cluster size on us), and that caller + tests for ext2_wa==NULL. +*/ + +int ext2_compress_cluster(struct inode *inode, u32 cluster) +{ + struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS + 1]; + struct page *pg[EXT2_MAX_CLUSTER_PAGES]; + int s_nblk; /* Equals clu_nblocks less any trailing hole blocks. */ + unsigned u_nblk = (~(unsigned) 0), c_nblk; /* Number of blocks occupied by + un/compressed data. */ + int result, n, x; + int ulen, maxlen = 0, clen = 0; + unsigned char *dst; + u8 *src; + unsigned meth, alg; + int nbh = 0, npg, i; + unsigned char holemap_nbytes = 0; + unsigned last_hole_pos; + struct ext2_cluster_head *head; + unsigned r_nblk; + struct ext2_inode_info *ei = EXT2_I(inode); + unsigned long saved_isize; + //int dotrunc = 1; //mw + +#ifdef CONFIG_HIGHMEM + int kmapped = 0; +#endif + + /* impl: Otherwise, ioctl() could change the cluster size + beneath us. */ + /* TLL say not compressed and return -1 6-15-07 */ + if (!(ei->i_flags & EXT2_COMPRBLK_FL)) + return -1; + + //mw + saved_isize = inode->i_size; + + assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */ + assert(!mapping_mapped(inode->i_mapping)); + + npg = ext2_cluster_npages(inode, cluster); + + result = ext2_get_cluster_pages(inode, cluster, pg, NULL, 1); + if (result <= 0) + goto done; + +#ifdef CONFIG_HIGHMEM + ext2_kmap_cluster_pages(NULL, pg, NULL); + kmapped = 1; +#endif + + /* effic: We ought to use the page cache. Using the page + cache always costs extra CPU time, but saves I/O if the + page is present. We still need to detect holes, which + unfortunately may still cause I/O. Testing for all-zero + could save us that I/O. */ + + nbh = ext2_get_cluster_blocks(inode, cluster, bh, pg, NULL, 1); + + s_nblk = nbh; + +#ifdef EXT2_COMPR_REPORT + { + int i; + trace_e2c(" > > > ext2_compress_cluster %d: inode=%ld, size=%d\n", + cluster, inode->i_ino, (int) inode->i_size); +#ifdef EXT2_COMPR_REPORT_CPR + for (i = 0; i < s_nblk; i++) { + if (bh[i]) { + printk(KERN_DEBUG + "bbuffer_head[%d]: blocknr=%lu, addr=0x%p ", i, + (unsigned long) bh[i]->b_blocknr, bh[i]); + if (bh[i]->b_page) + printk(KERN_DEBUG "bgn:[page->index=%ld]\n", + bh[i]->b_page->index); + else + printk(KERN_DEBUG "[No page]\n"); + } else + printk("bbuffer_head[%d] is NULL\n", i); + } +#endif + } +#endif + /* + * Did somebody else compress the cluster while we were waiting ? + * This should never arise ... + */ + result = ext2_cluster_is_compressed_fn(inode, cluster); + if (result != 0) { + if (result > 0) { + ext2_msg(inode->i_sb, + "ext2_compress_cluster", + "compressing compressed cluster"); + } + goto done; + } + + /* I moved it here in case we need to load a module that + * needs more heap that is currently allocated. + * In such case "init_module" for that algorithm forces + * re-allocation of ext2_wa. It should be safe here b/c the + * first reference to ext2_wa comes just after and we have + * locked ext2_wa before. + * + * I know that we may not need the compression at all + * (compressing 0 or 1 block) but it's better to sacrifice + * a bit than do make a total mess of this code. + * + * FIXME: Totally separate working areas for reading and writing. + * Jan R. + */ + + meth = ei->i_compr_method; + assert(meth < EXT2_N_METHODS); + alg = ext2_method_table[meth].alg; +#ifdef CONFIG_KMOD + if (!ext2_algorithm_table[alg].avail) { + char str[32]; + + sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name); + request_module(str); + } +#endif + + result = -EINTR; + + /* + * Try to get the working area. + */ +#ifdef EXT2_COMPR_REPORT_WA + printk(KERN_DEBUG "pid %d enters critical region\n", current->pid); +#endif + if (get_cpu_var(ext2_wr_wa) == NULL) + { + ext2_alloc_wr_wa(); + } + assert(__get_cpu_var(ext2_wr_wa) != NULL); + + + /* + * Now, we try to compress the cluster. If the cluster does + * not compress well, we just give up. Otherwise, we reuse + * the old blocks to store the compressed data (except that + * compressed data is contiguous in the file even if the + * uncompressed data had holes). + */ + + /* + * Compute the block bitmap, how many bytes of data we have + * in the cluster, and the maximum interesting length after + * compression. The bitmap will be used to reallocate blocks + * when decompressing the cluster, so that we don't create blocks + * that were previously missing. We also pack the buffers + * together. + */ + + head = (struct ext2_cluster_head *) __get_cpu_var(ext2_wr_wa)->c; +#if EXT2_MAX_CLUSTER_BLOCKS > 32 +# error "We need to zero more bits than this." +#endif + *(u32 *) (&head[1]) = 0; + last_hole_pos = (unsigned) (-1); + assert(head->holemap[0] == 0); + assert(head->holemap[1] == 0); + assert(head->holemap[2] == 0); + assert(head->holemap[3] == 0); + assert(*(u32 *) head->holemap == 0); + assert(count_bits(head->holemap, 4) == 0); + + /* TODO: Check that i_size can't change beneath us. + do_truncate() is safe because it uses i_sem around changing + i_size. For the moment, I do a runtime check. */ + + saved_isize = inode->i_size; + +#ifdef EXT2_COMPR_REPORT_VERBOSE + printk + ("00 ext2_compress_cluster[%u]: i_size=%u, s_blocksize_bits=%u, s_nblk=%u\n", + __LINE__, (unsigned) inode->i_size, inode->i_sb->s_blocksize_bits, + s_nblk); +#endif +// assert (ROUNDUP_RSHIFT(inode->i_size, inode->i_sb->s_blocksize_bits) +// >= s_nblk); + /* This initial guess at ulen doesn't take holes into account + unless they're at end of cluster. We ,compensate for other + holes` during the loop below. */ + ulen = MIN(s_nblk << inode->i_sb->s_blocksize_bits, + inode->i_size - ext2_cluster_offset(inode, cluster)); + r_nblk = (((ulen - 1) >> inode->i_sb->s_blocksize_bits) + 1); + if (r_nblk <= 1) { + /* MW: required to remove Z flag, otherwise compress + * is tried on each access */ + result = 0; + goto no_compress; + } + /* Verify if more than 1 block to compress in the cluster */ + nbh = 0; + for (x = 0; x < s_nblk; x++) { + if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) { + nbh++; + } else { + last_hole_pos = x; + head->holemap[x >> 3] |= 1 << (x & 7); + ulen -= inode->i_sb->s_blocksize; + /* impl: We know that it's a whole block because + ext2_get_cluster_blocks trims s_nblk for trailing + NULL blocks, and partial blocks only come at + the end, so there can't be partial NULL blocks. */ + } + } + /* We don't try to compress cluster that only have one block + or no block at all. (When fragments are implemented, this code + should be changed.) */ + if (nbh <= 1) { + /* MW: required to remove Z flag, otherwise compress + * is tried on each access */ + goto no_compress; + } + + u_nblk = nbh; + /* Copy the data in the compression area */ + dst = __get_cpu_var(ext2_wr_wa)->u; + for (x = 0; x < s_nblk; x++) { + if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) { + restore_b_data_himem(bh[x]); + memcpy(dst, bh[x]->b_data, bh[x]->b_size); + dst += bh[x]->b_size; + } + } + + assert(count_bits(head->holemap, 4) == s_nblk - u_nblk); + +#if EXT2_GRAIN_SIZE != EXT2_MIN_BLOCK_SIZE +# error "this code ought to be changed" +#endif + + /* ,maxlen` is the maximum length that the compressed data can + be while still taking up fewer blocks on disk. */ + holemap_nbytes = (last_hole_pos >> 3) + 1; + /* impl: Remember that ,last_hole_pos` starts off as being -1, + so the high 3 bits of ,last_hole_pos >> 3` can be wrong. + This doesn't matter if holemap_nbytes discards the high + bits. */ + + assert(sizeof(holemap_nbytes) < sizeof(unsigned)); + assert((last_hole_pos == (unsigned) -1) + == (holemap_nbytes == 0)); + maxlen = + ((((r_nblk < + u_nblk) ? r_nblk : u_nblk) - 1) * inode->i_sb->s_blocksize - + sizeof(struct ext2_cluster_head) + - holemap_nbytes); + clen = 0; + /* Handling of EXT2_AUTO_METH at the moment is just that we + use the kernel default algorithm. I hope that in future + this can be extended to the kernel deciding when to + compress and what algorithm to use, based on available disk + space, CPU time, algorithms currently used by the fs, + etc. */ + if ((meth == EXT2_AUTO_METH) + || !ext2_algorithm_table[alg].avail) { + meth = EXT2_DEFAULT_COMPR_METHOD; + alg = ext2_method_table[meth].alg; + assert(ext2_algorithm_table[alg].avail); + } + if (alg == EXT2_NONE_ALG) + goto no_compress; + + clen = ext2_algorithm_table[alg].compress(__get_cpu_var(ext2_wr_wa)->u, + __get_cpu_var(ext2_wr_wa)->c + sizeof(struct ext2_cluster_head) + holemap_nbytes, + __get_cpu_var(ext2_wr_wa)->heap, ulen, maxlen, ext2_method_table[meth].xarg); + +#ifdef EXT2_COMPR_REPORT_ALGORITHMS + printk(KERN_DEBUG "03 ext2: %lu: cluster %d+%d [%d] compressed " + "into %d bytes (ulen = %d, maxlen = %d)\n", + inode->i_ino, + ext2_cluster_offset(inode, cluster), + ext2_cluster_nblocks(inode, cluster), + u_nblk, clen, ulen, maxlen); +#endif + + if ((clen == 0) || (clen > maxlen)) { + no_compress: + + /* this chunk didn't compress. */ + assert(inode->i_size == saved_isize); +#ifdef EXT2_COMPR_REPORT_WA + printk(KERN_DEBUG + "pid %d leaves critical region, nbh=%d, u_nblk=%d, " + "inode->i_size=%lu, saved_isize=%lu, clen=%d, ulen=%d, maxlen=%d\n", + current->pid, nbh, u_nblk, + (long unsigned) inode->i_size, saved_isize, clen, ulen, + maxlen); +#endif + + result = 0; + put_cpu_var(ext2_wr_wa); + goto done; + } + + +#if EXT2_MAX_CLUSTER_BLOCKS > 32 +# error "We need to zero more bits than this." +#endif + assert(-1 <= (int) last_hole_pos); + assert((int) last_hole_pos < 32); + assert((le32_to_cpu(*(u32 *) head->holemap) + & (~0u << (1 + last_hole_pos)) + & (~(~0u << (8 * holemap_nbytes)))) + == 0); + /* Don't change "~0u << (1 + last_hole_pos)" to "~1u << last_hole_pos" + as I almost did, as last_hole_pos can be -1 and cannot be 32. */ + assert(count_bits(head->holemap, holemap_nbytes) == s_nblk - u_nblk); + + /* Compress the blocks at the beginning of the cluster */ + for (x = 0, nbh = 0; x < s_nblk; x++) { + if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) { + if (nbh != x) { + restore_b_data_himem(bh[x]); + bh[nbh]->b_blocknr = bh[x]->b_blocknr; + set_bit(BH_Mapped, &bh[nbh]->b_state); + bh[x]->b_blocknr = 0; + assert(buffer_mapped(bh[x])); + clear_bit(BH_Mapped, &bh[x]->b_state); + } + nbh++; + } + } + assert(nbh == u_nblk); + assert(count_bits(head->holemap, holemap_nbytes) == s_nblk - u_nblk); + + /* + * Compression was successful, so add the header and copy to blocks. + */ + + /* Header. */ + { + head->magic = cpu_to_le16(EXT2_COMPRESS_MAGIC_04X); + head->method = meth; + head->holemap_nbytes = holemap_nbytes; + head->ulen = cpu_to_le32(ulen); + head->clen = cpu_to_le32(clen); + + barrier(); //mw: "barrier" tells compiler not to re-order resulting asm statments, somehow. + head->checksum = + cpu_to_le32(ext2_adler32 + (le32_to_cpu(*(u32 *) __get_cpu_var(ext2_wr_wa)->c), + __get_cpu_var(ext2_wr_wa)->c + 8, + (sizeof(struct ext2_cluster_head) - 8 + + head->holemap_nbytes + clen))); + } + + assert((le32_to_cpu(*(u32 *) head->holemap) + & (~0 << (1 + last_hole_pos)) + & ((1 << (8 * holemap_nbytes)) - 1)) == 0); + result = clen += sizeof(struct ext2_cluster_head) + holemap_nbytes; + c_nblk = ROUNDUP_RSHIFT(clen, inode->i_sb->s_blocksize_bits); + + /* Release unneeded buffer heads. (Freeing is done later, + after unlocking ext2_wr_wa.) */ + assert(nbh == u_nblk); + nbh = c_nblk; + +#ifdef EXT2_COMPR_REPORT + trace_e2c("ext2_compress_cluster: head->clen=%d, clen=%d\n", head->clen, clen); +#endif + src = __get_cpu_var(ext2_wr_wa)->c; + + for (n = 0; (int) clen > 0; n++) { + restore_b_data_himem(bh[n]); + if (clen >= inode->i_sb->s_blocksize) { + memcpy(bh[n]->b_data, src, inode->i_sb->s_blocksize); + } else { + memcpy(bh[n]->b_data, src, clen); + } + + /* TO_DO: OSYNC. means: write opertions are blocking until the + * the pages are written from page cache to disk */ + + set_buffer_uptodate(bh[n]); + set_buffer_dirty(bh[n]); + src += inode->i_sb->s_blocksize; + clen -= inode->i_sb->s_blocksize; + } + + i = 0; + assert(n == c_nblk); + assert((le32_to_cpu(*(u32 *) head->holemap) + & (~0 << (1 + last_hole_pos)) + & ((1 << (8 * holemap_nbytes)) - 1)) == 0); + + /* Runtime check that no-one can change i_size while i_sem is down. + (See where saved_isize is set, above.) */ + assert(inode->i_size == saved_isize); + assert(!mapping_mapped(inode->i_mapping)); + + /* Free the remaining blocks, and shuffle used blocks to start + of cluster in blkaddr array. */ + { + u32 free_ix, curr; + int err; + + /* Calculate free_ix. There should be ,c_nblk` + non-hole blocks among the first ,free_ix` + blocks. */ + { + assert((le32_to_cpu(*(u32 *) head->holemap) + & (~0 << (1 + last_hole_pos)) + & ((1 << (8 * holemap_nbytes)) - 1)) == 0); + assert(n == c_nblk); + for (free_ix = 0; + ((int) free_ix <= (int) last_hole_pos) && (n > 0); + free_ix++) + if (!(head->holemap[free_ix >> 3] + & (1 << (free_ix & 7)))) + n--; + free_ix += n; + + if ((free_ix < c_nblk) + || (free_ix + u_nblk > s_nblk + c_nblk) + || (free_ix >= ext2_cluster_nblocks(inode, cluster)) + || ((holemap_nbytes == 0) && (c_nblk != free_ix))) { + assert(free_ix >= c_nblk); + /*assert (free_ix - c_nblk <= s_nblk - u_nblk); */ + assert(free_ix + u_nblk <= s_nblk + c_nblk); + assert(free_ix < ext2_cluster_nblocks(inode, cluster)); + assert((holemap_nbytes != 0) || (c_nblk == free_ix)); + assert(1 <= c_nblk); + assert(c_nblk < u_nblk); + assert(u_nblk <= s_nblk); + assert(s_nblk <= ext2_cluster_nblocks(inode, cluster)); + assert(ext2_cluster_nblocks(inode, cluster) <= + EXT2_MAX_CLU_NBLOCKS); + ext2_error(inode->i_sb, "ext2_compress_cluster", + "re assertions: c=%d, u=%d, f=%d, s=%d, n=%d, " + "lhp=%d, hm=%x, hnb=%d, " "ino=%lu, clu=%u", + (int) c_nblk, (int) u_nblk, (int) free_ix, + (int) s_nblk, (int) ext2_cluster_nblocks(inode, + cluster), + (int) last_hole_pos, + (unsigned) le32_to_cpu(*(u32 *) head->holemap), + (int) holemap_nbytes, inode->i_ino, cluster); + } + } + + /*mw: put here: set all __get_cpu related pointers to NULL + as they become invalid with put_cpu */ + head = NULL; /* prevent any more stupid bugs */ + src = NULL; + dst = NULL; + put_cpu_var(ext2_wr_wa); + +#ifdef EXT2_COMPR_DEBUG + /* TODO: remove this TEST */ + /* mw: ext2_free_cluster_blocks can sleep: check we are not atomic */ + schedule(); +#endif + + /* Free unneeded blocks, and mark cluster as + compressed. */ + err = ext2_free_cluster_blocks + (inode, + ext2_cluster_block0(inode, cluster) + free_ix, + ext2_cluster_nblocks(inode, cluster) - free_ix); + /* pjm 1998-06-15: This should help reduce fragmentation. + Actually, we could set block to clu_block0 + clu_nbytes, + and goal to the last allocated blkaddr in the compressed + cluster. + It would be nice if we would transfer the freed blocks + to preallocation, while we're at it. */ +// write_lock(&ei->i_meta_lock); + /* mw: i_next_alloc_goal and i_next_alloc_block were removed in 2.6.24.x + * so we dont need to set them to 0 (they are anyway, somehow). + */ + //ei->i_next_alloc_goal = ei->i_next_alloc_block = 0; +// write_unlock(&ei->i_meta_lock); + if (err < 0) { + goto done; + } + /* Note that ext2_free_cluster_blocks() marks the + cluster as compressed. */ + + /* Shuffle used blocks to beginning of block-number array. */ + { + struct ext2_bkey key; + unsigned i; + + if (!ext2_get_key(&key, + inode, + ext2_cluster_block0(inode, cluster))) { + ei->i_flags |= EXT2_ECOMPR_FL; + result = -EIO; + free_ix = 0; + } + for (i = 0; i < free_ix; i++) { + curr = ext2_get_key_blkaddr(&key); + + if ((c_nblk == free_ix) + && (curr != bh[i]->b_blocknr)) { + /* "Can't happen", yet has + happened a couple of times. */ + ext2_error(inode->i_sb, "ext2_compress_cluster", + "c_nblk=free_ix=%d, " + "curr=%u, b_blocknr=%lu, " + "lhp=%d , hm=, " + "ino=%lu, blk=%u", + c_nblk, curr, + (unsigned long) bh[i]->b_blocknr, + (int) last_hole_pos, + /*mw: became invalid due put_cpu: + (unsigned) le32_to_cpu(*(u32 *) head-> + holemap),*/ + inode->i_ino, + (unsigned) + ext2_cluster_block0(inode, cluster) + i); + } + err = ext2_set_key_blkaddr(&key, + (i < c_nblk + ? bh[i]->b_blocknr + : EXT2_COMPRESSED_BLKADDR)); + if (err) + break; + if (!ext2_next_key(&key, 1)) { + ei->i_flags |= EXT2_ECOMPR_FL; /* sorry... */ + result = -EIO; + break; + } + } + ext2_free_key(&key); + } + } + + /* + * Unlock the working area. + */ + +#ifdef EXT2_COMPR_REPORT_WA + printk(KERN_DEBUG "pid %d leaves critical region\n", current->pid); +#endif + + assert(c_nblk < u_nblk); + ext2_mark_algorithm_use(inode, alg); + + /* TLL update b_assoc_map per 2.6.20 6-07-07 */ + for (i = 0; i < c_nblk; i++) + if (bh[i] != NULL) { + bh[i]->b_assoc_map = inode->i_mapping; + bh[i]->b_page->mapping = inode->i_mapping; //Andreas 5-24-07 : necessary? WRONG? + } + //mw: we must force the writeback, otherwise ext2_readpage will get confused + // yaboo ding had similiar code above. but I think it makes more sense after + // the block shuffeling. + // Note: generic_oysnc_inode() made trouble with USB-Sticks and caused a lot + // of IO, stalled system ... therefore ll_rw_block() replace it. Anyway we already operate + // with this low-level function. + + /*mw: new "hole" fix. hole == bdev bug! */ + for (i = 0; i < c_nblk; i++) { + + /* this was a hole (uncompressed) + * at the beginning of the cluster. + * so NO block was yet associated with it. + * But now we need it, because a compressed + * cluster always starts at the cluster.*/ + if (!buffer_mapped(bh[i]) || bh[i]->b_bdev == NULL) { + u32 block = ext2_cluster_block0(inode, cluster); + ext2_get_block(inode, block + i, bh[i], 1); + //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh[i]->b_blocknr, (bh[i]->b_state & BH_Mapped), (bh[i]->b_page ? bh[i]->b_page->index : 0), bh[i]->b_bdev ); + } + assert(buffer_mapped(bh[i])); + assert(bh[i]->b_bdev != NULL); + assert(bh[i]->b_bdev == inode->i_sb->s_bdev); + } + + ll_rw_block(WRITE, c_nblk, bh); + + CHECK_NOT_ATOMIC + //mw: seems we have to wait here, otherwise: crash! + for (i = 0; i < c_nblk; i++) { + if (bh[i]) + wait_on_buffer(bh[i]); + //printk("written compressed block: Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh[i]->b_blocknr, (bh[i]->b_state & BH_Mapped), (bh[i]->b_page ? bh[i]->b_page->index : 0), bh[i]->b_bdev ); + } + + +#ifdef CONFIG_HIGHMEM + if (kmapped) + ext2_kunmap_cluster_pages(NULL, pg, NULL); +#endif + + inode->i_ctime = CURRENT_TIME; //mw: these two come always together. So I also put it here. + mark_inode_dirty_sync(inode); + + //ext2_update_inode(inode, inode_needs_sync(inode)); //mw: might be able to fix pipe_write vs. readpage. mutex-rec-locking + + /* COMPRBLK is already high, so no need to raise it. */ + { + for (i = c_nblk; (i < EXT2_MAX_CLUSTER_BLOCKS) && (bh[i] != NULL); + i++) { + clear_buffer_dirty(bh[i]); + bh[i]->b_blocknr = 0; + clear_bit(BH_Mapped, &bh[i]->b_state); + clear_bit(BH_Uptodate, &bh[i]->b_state); + } + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (pg[i] == NULL) { + break; + } + assert(PageLocked(pg[i])); + ClearPageUptodate(pg[i]); + unlock_page(pg[i]); + page_cache_release(pg[i]); + } + + /* invalidate_inode_buffers replacement code: TLL 02/21/07 + * e2compr on post 2.6.10 kernels do not have an uptodate + * mapping->assoc_mapping (other Vm(?) changes require it be + * made explicit, 2.4 kernels have it implicit). Therefore, when + * umount is called, a GPF ensues from a NULL ops pointer. + * e2c on a USB thumbdrive mounted as the root fs does not + * support repeated compress/uncompress cycles on a given file. + * Inlined the flush list code to explicityly force update to + * disk with a known valid bh list. + */ + + /* mw: I consider this code as ... not so good! */ + /* + if (inode_has_buffers(inode)) { + //struct address_space *mapping = &inode->i_data; + // struct address_space *buffer_mapping = mapping->assoc_mapping; + // requires: inode->i_data->mapping->assoc_mapping; to be set + invalidate_inode_buffers(inode); // TLL do it proper 5-25-07 + //if (dotrunc) + //ext2_truncate(inode); // TLL file size hack 6-19-07 + } + */ + + } +#ifdef EXT2_COMPR_REPORT + trace_e2c(" < < < ext2_compress_cluster %i: [done cpr] inode=%ld\n", cluster, inode->i_ino); +#endif + return result; + + + done: + +#ifdef CONFIG_HIGHMEM + if (kmapped) + ext2_kunmap_cluster_pages(NULL, pg, NULL); +#endif + + { + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (pg[i] == NULL) { + break; + } + unlock_page(pg[i]); + page_cache_release(pg[i]); + } + /* TLL cp to compr dir bug fix 03-25-07 + Truncate uncompressed files to their uncompressed + length, i.e. force kernel to update inode and sb */ + //if(dotrunc) + //26.08.2011: ext2_truncate(inode) does not exist anymore + ext2_truncate_blocks(inode, inode->i_size); + + } +#ifdef EXT2_COMPR_REPORT_VERBOSE + { + int i; + + printk(KERN_DEBUG "ext2_compress_cluster[end]: buffers kept for cluster=%d\n", cluster); + for (i = 0; i < nbh; i++) { + if (bh[i]) { + printk(KERN_DEBUG "2buffer_head[%d]: blocknr=%lu, addr=0x%p ", i, (unsigned long) bh[i]->b_blocknr, bh[i]); + if (bh[i]->b_page) + printk(KERN_DEBUG "2:[page->index=%ld]\n", bh[i]->b_page->index); + else + printk(KERN_DEBUG "[No page]\n"); + } else + printk(KERN_DEBUG "buffer_head[%d] is NULL\n", i); + } + } +#endif + +#ifdef EXT2_COMPR_REPORT + trace_e2c(" < < < ext2_compress_cluster %i: [done NO cpr] inode=%ld\n", cluster, inode->i_ino); +#endif + return result; +} + + +/* Go through all the clusters and compress them if not already + compressed. + + This is called by ext2_put_inode() and ext2_release_file(). Later, + we may have ext2_ioctl() call it (when EXT2_COMPR_FL rises). None + of the callers does any locking, so we do it here. + + Neither of the current callers uses the return code, but we get ready + for if we start using it. + + Returns 0 on "success" (whether or not we cleared EXT2_CLEANUP_FL + or EXT2_DIRTY_FL bits), -errno on error. */ +int ext2_cleanup_compressed_inode(struct inode *inode) +{ + u32 cluster; + u32 n_clusters; + int dirty = 0; + int err = 0; + u32 comprblk_mask; + atomic_t start_i_count = inode->i_count; + int retry = 0; + int have_downed; + struct ext2_inode_info *ei = EXT2_I(inode); +#ifdef EXT2_COMPR_REPORT + char bdn[BDEVNAME_SIZE]; +#endif + + /* impl: Actually, this assertion could fail if the kernel + isn't locked. I haven't looked, but I suppose that the + kernel always is locked when this is called. */ + assert(ei->i_compr_flags & EXT2_CLEANUP_FL); + +#ifdef EXT2_COMPR_REPORT_PUT + printk(KERN_DEBUG "ext2_cleanup_compressed_inode() called for pid %d; " + "dev=%s, ino=%lu, i_state=0x%lx, i_count=%u\n", + current->pid, bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino, + inode->i_state, atomic_read(&inode->i_count)); +#endif + + /* Do these tests twice: once before down() and once after. */ + for (have_downed = 0;; have_downed++) { + if ((ei->i_flags & (EXT2_COMPR_FL | EXT2_DIRTY_FL)) + != (EXT2_COMPR_FL | EXT2_DIRTY_FL)) { + if (have_downed) + goto out; + /* TLL 5-25-07 changed from a warning to trace */ + /*trace_e2c("ext2_cleanup_compressed_inode: trying to un/compress an " + "uncompressable file.\n" + "i_flags=%#x. (dev=%s, ino=%lu, down=%d)\n", + ei->i_flags, bdevname(inode->i_sb->s_bdev, bdn), + inode->i_ino, have_downed); */ + return 0; + } + + /* test if file is mapped by mmap */ + if (mapping_mapped(inode->i_mapping)) + { + //trace_e2c("ext2_cleanup_compressed_inode: (dev. %s): ino=%ld: file mapped, does not compress cluster\n", bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino); + if (have_downed) + goto out; + else + return 0; + } + + if (IS_RDONLY(inode) + || (ei->i_flags & EXT2_ECOMPR_FL)) { + ei->i_compr_flags &= ~EXT2_CLEANUP_FL; + if (have_downed) + goto out; + else + return 0; + } + + //mw + if (ext2_get_dcount(inode) > 1) { + err = 0; + //printk("Compress: file busy (dcount: %i>1)\n", ext2_get_dcount(inode)); + if (have_downed) + goto out; + else + return 0; + } + + if (have_downed) + break; + + /* Quotas aren't otherwise kept if file is opened O_RDONLY. */ + dquot_initialize(inode); + + /* Check whether OSYNC of inode is acutally running */ + //if (ei->i_compr_flags & EXT2_OSYNC_INODE) + //printk(KERN_DEBUG "OSYNC!\n"); + + /* I think: + * checking these flags should prevent that one Process aquires the MUTEX again, + * e.g. in a recursive call + * BUT: what happens acutally: two processes are working on this inode: pdflush and the userprogramm + * SO: the check might be correct if: ei->i_compr_flags & EXT2_OSYNC_INOD AND the same process already posesses this lock!!! + */ + //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) + //{ + mutex_lock(&inode->i_mutex); +#ifdef EXT2_COMPR_REPORT_MUTEX + printk(KERN_DEBUG "CLEANUP_LOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino); +#endif + //} + } + n_clusters = ext2_n_clusters(inode); + +#ifdef EXT2_COMPR_REPORT_PUT + printk(KERN_DEBUG "ext2: inode:%lu: put compressed, clusters = %d, flags = %x, pid = %u\n", + inode->i_ino, n_clusters, ei->i_flags, current->pid); +#endif + + assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */ + + /* Try to compress the clusters. We clear EXT2_DIRTY_FL only + if we looked at every cluster and if there was no error. */ + + /* impl: We raise EXT2_COMPRBLK_FL now so that ext2_ioctl() + doesn't try to change the cluster size beneath us. If need + be, we restore the bit to its original setting before + returning. Note that no-one else can _change_ + EXT2_COMPRBLK_FL while we work because i_sem is down. */ + /* impl: Note what's happening here with comprblk_mask. The + current state of COMPRBLK_FL (before we start) is that + (comprblk == 1) || (no compressed clusters). At the end of + the procedure, comprblk == one if (at least one compressed + cluster, or an error occurred preventing us from finding + out). */ + comprblk_mask = ~EXT2_COMPRBLK_FL | ei->i_flags; + ei->i_flags |= EXT2_COMPRBLK_FL; + + for (cluster = 0; cluster < n_clusters; cluster++) { + if (atomic_read(&inode->i_count) > atomic_read(&start_i_count)) { + /* This is a poor way of doing this (and doubly + poor now that the only users of i_count are + the dentries), but the idea is not to + compress things tht are likely to be + decompressed soon. I guess a better way of + doing this would be just to make sure tht + the stuff is in the page cache. */ + retry = 1; + break; + } + err = ext2_cluster_is_compressed_fn(inode, cluster); + if (err == 0) { + //mw: ext2_compress_cluster might clean EXT2_COMPRBLK_FL, therefore raise it for every new cluster + ei->i_flags |= EXT2_COMPRBLK_FL; + + err = ext2_compress_cluster(inode, cluster); + if (err < 0) + dirty = 1; + else if (err > 0) + comprblk_mask = ~0ul; + } else if (err < 0) + break; + else { + err = 0; + assert(comprblk_mask == ~0ul); /* i.e. that EXT2_COMPRBLK_FL was high. */ + } + } + + if ((cluster >= n_clusters) && !dirty) + ei->i_flags &= ~EXT2_DIRTY_FL; + if (!retry) { + ei->i_compr_flags &= ~EXT2_CLEANUP_FL; + ei->i_flags &= comprblk_mask; + } + + /* We clear EXT2_CLEANUP_FL because, otherwise, we'll get + called again almost immediately. */ + + /* + * The CLEANUP flag *MUST* be cleared, otherwise the iput routine + * calls ext2_put_inode() again (because i_dirt is set) and there + * is a loop. The control scheme (CLEANUP + DIRTY flags) could + * probably be improved. On the other hand, i_dirt MUST be set + * because we may have sleeped, and we must force the iput routine + * to look again at the i_count ... + */ + /* TODO: Have a look at this cleanup scheme. The above + comment sounds wrong. */ + + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + out: + +#ifdef EXT2_COMPR_REPORT_MUTEX + printk(KERN_DEBUG "CLEANUP_UNLOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino); +#endif + +// if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) { /* MW 5-16-07 */ + mutex_unlock(&inode->i_mutex); +// } /* MW 5-16-07 */ + return err; /* TODO: Check that ,err` is appropriate. */ +} + + +int ext2_recognize_compressed(struct inode *inode, unsigned cluster) +{ + /* ext2_recognize_compressed(): Check tht the cluster is valid + in every way, and then do the EXT2_COMPRESSED_BLKADDR + thing. */ + /* nyi, fixme. All of the userspace stuff (EXT2_NOCOMPR_FL + etc.) needs work, so I might as well leave this. See + ioctl.c for a description of what it's supposed to do. */ + return -ENOSYS; +} + + +/* Look for compressed clusters. If none, then clear EXT2_COMPRBLK_FL. + + Called by: + ext2_truncate(). + */ +void ext2_update_comprblk(struct inode *inode) +{ + unsigned block, last_block; + struct ext2_bkey key; + struct ext2_inode_info *ei = EXT2_I(inode); + + assert(ei->i_flags & EXT2_COMPRBLK_FL); + if (inode->i_size == 0) { + ei->i_flags &= ~EXT2_COMPRBLK_FL; + trace_e2c("ext2_update_comprblk 1: inode: %lu removed EXT2_COMPRBLK_FL!\n", inode->i_ino); + return; + } + last_block = ROUNDUP_RSHIFT(inode->i_size, + inode->i_sb->s_blocksize_bits) - 1; + block = ext2_first_cluster_nblocks(inode) - 1; + + assert(atomic_read(&inode->i_mutex.count) <= 0); + + if (!ext2_get_key(&key, inode, block)) + return; + for (;;) { + if (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR) + goto out; + if (block >= last_block) + goto clear; + if (!ext2_next_key(&key, ei->i_clu_nblocks)) + goto out; + block += ei->i_clu_nblocks; + } + clear: + trace_e2c("ext2_update_comprblk 2: inode: %lu removed EXT2_COMPRBLK_FL!\n", inode->i_ino); + ei->i_flags &= ~EXT2_COMPRBLK_FL; + out: + ext2_free_key(&key); + assert(atomic_read(&inode->i_mutex.count) <= 0); + +} + + +/* + * allocate working areas + */ + +DEFINE_PER_CPU(struct ext2_wa_S *, ext2_rd_wa) = NULL; +DEFINE_PER_CPU(struct ext2_wa_S *, ext2_wr_wa) = NULL; + +/* SMP, setup wa's. caller must hold wa already via get_cpu_var */ +void ext2_alloc_rd_wa(){ + if ((__get_cpu_var(ext2_rd_wa) == NULL) ) { + size_t rsize = 2 * EXT2_MAX_CLUSTER_BYTES; //mw: just guessing + + __get_cpu_var(ext2_rd_wa) = vmalloc (rsize); + if (__get_cpu_var(ext2_rd_wa) == NULL) + printk ("EXT2-fs: can't allocate working area; compression turned off.\n"); + else { + printk ("ext2-compression: allocated read buffer for CPU%i at %p-%p (%zu bytes)\n", + get_cpu(), __get_cpu_var(ext2_rd_wa), (char *)__get_cpu_var(ext2_rd_wa) + rsize, rsize); +# ifdef EXT2_COMPR_REPORT_WA + printk (KERN_INFO "EXT2-fs: rd_wa=%p--%p (%d)\n", + ext2_rd_wa, (char *)ext2_rd_wa + rsize, rsize); +# endif + put_cpu(); + } + } +} + +void ext2_alloc_wr_wa(){ + + if ((__get_cpu_var(ext2_wr_wa) == NULL) ) { + size_t wsize = 2 * EXT2_MAX_CLUSTER_BYTES; //mw: just guessing + __get_cpu_var(ext2_wr_wa) = vmalloc (wsize); + + if (__get_cpu_var(ext2_wr_wa) == NULL) + printk ("EXT2-fs: can't allocate working area; " + "compression turned off.\n"); + else { + printk ("ext2-compression: allocated write buffer for CPU%i at %p-%p (%zu bytes)\n", + get_cpu(), __get_cpu_var(ext2_wr_wa), (char *)__get_cpu_var(ext2_wr_wa) + wsize, wsize); +#ifdef EXT2_COMPR_REPORT_WA + printk (KERN_INFO "EXT2-fs: wr_wa=%p--%p (%d)\n", + ext2_wr_wa, (char *)ext2_wr_wa + wsize, wsize); +#endif + put_cpu(); + } + } +} + + --- linux-3.2-rc5/fs/ext2/e2zlib.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/e2zlib.c 2011-12-13 14:22:47.841975843 +0100 @@ -0,0 +1,74 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct crypto_comp *, tfm) = NULL; + +size_t ext2_iZLIB(int action) +{ + /*mw: we init tfm when we need it...*/ + return 0; +} + + +size_t ext2_wZLIB(__u8 * ibuf, __u8 * obuf, void *heap, + size_t ilen, size_t olen, int level) +{ + int ret, dlen; + + if (!try_module_get(THIS_MODULE)) + return 0; + + /*check if we already have a tfm*/ + get_cpu_var(tfm); + if (__get_cpu_var(tfm) == NULL){ + __get_cpu_var(tfm) = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC); + } + assert(__get_cpu_var(tfm) != NULL); + + dlen = olen; + ret = crypto_comp_compress(__get_cpu_var(tfm) , ibuf, ilen, obuf, &dlen); + + put_cpu_var(tfm); + + if (ret) { + //printk(KERN_DEBUG "ext2_wZLIB: crypto_comp_compress failed: %d, ilen: %d, olen: %d\n", ret, ilen, olen); + return 0; + } + return dlen; +} + + +size_t ext2_rZLIB(__u8 * ibuf, __u8 * obuf, void *heap, + size_t ilen, size_t olen, int ignored) +{ + int ret, dlen; + + if (!try_module_get(THIS_MODULE)) + return 0; + + /*check if we already have a tfm*/ + get_cpu_var(tfm); + if (__get_cpu_var(tfm) == NULL){ + __get_cpu_var(tfm) = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC); + } + assert(__get_cpu_var(tfm) != NULL); + + dlen = olen; + ret = crypto_comp_decompress(__get_cpu_var(tfm), ibuf, ilen, obuf, &dlen); + + put_cpu_var(tfm); + + if (ret) { + //printk(KERN_DEBUG "ext2_wZLIB: crypto_comp_decompress failed: %d, ilen: %d, olen: %d\n", ret, ilen, olen); + return 0; + } + + return dlen; +} --- linux-3.2-rc5/fs/ext2/adler32.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/adler32.c 2011-12-13 14:22:47.841975844 +0100 @@ -0,0 +1,43 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: e2compr2.6.25.patch,v 1.1.2.1 2008/04/17 09:49:32 winkler Exp $ */ + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* ========================================================================= */ +unsigned long ext2_adler32(unsigned long adler, const unsigned char *buf, unsigned int len) +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == 0) return 1L; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} --- linux-3.2-rc5/fs/ext2/super.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/super.c 2011-12-13 14:22:47.843975906 +0100 @@ -32,7 +32,12 @@ #include #include #include +#ifdef CONFIG_EXT2_COMPRESS +#include +#include +#else #include "ext2.h" +#endif #include "xattr.h" #include "acl.h" #include "xip.h" @@ -393,7 +398,11 @@ enum { Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, - Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, + Opt_acl, Opt_noacl, +#ifdef CONFIG_EXT2_COMPRESS + Opt_force_compat, +#endif + Opt_xip, Opt_ignore, Opt_err, Opt_quota, Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation }; @@ -426,6 +435,9 @@ static const match_table_t tokens = { {Opt_ignore, "noquota"}, {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, +#ifdef CONFIG_EXT2_COMPRESS + {Opt_force_compat, "force-compat"}, +#endif {Opt_reservation, "reservation"}, {Opt_noreservation, "noreservation"}, {Opt_err, NULL} @@ -569,6 +581,11 @@ static int parse_options(char *options, clear_opt(sbi->s_mount_opt, RESERVATION); ext2_msg(sb, KERN_INFO, "reservations OFF"); break; +#ifdef CONFIG_EXT2_COMPRESS + case Opt_force_compat: + set_opt(sbi->s_mount_opt, FORCE_COMPAT); + break; +#endif case Opt_ignore: break; default: @@ -585,6 +602,10 @@ static int ext2_setup_super (struct supe int res = 0; struct ext2_sb_info *sbi = EXT2_SB(sb); +#ifdef CONFIG_EXT2_COMPRESS + printk (KERN_INFO E2COMPR_VERSION "\n"); +#endif + if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) { ext2_msg(sb, KERN_ERR, "error: revision level too high, " @@ -876,6 +897,65 @@ static int ext2_fill_super(struct super_ le32_to_cpu(features)); goto failed_mount; } +#ifdef CONFIG_EXT2_COMPRESS + /* Check that required algorithms are available. */ + /* todo: Provide a mount option to override this. */ + /* + * Philosophical bug: we assume that an algorithm's + * module is available if and only if this kernel was + * compiled with that algorithm as a module. This is + * untrue, but it is unclear what the right thing to + * do is. + */ + j = 0; /* error flag */ + if ((es->s_feature_incompat + & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)) + && (es->s_algorithm_usage_bitmap + & ~cpu_to_le32(EXT2_ALGORITHMS_SUPP))) { + /* + * The filesystem employs an algorithm not + * supported by this filesystem. Issue warning or + * error. + */ + for (i = 0; i < 32; i++) { + if (!(es->s_algorithm_usage_bitmap + & cpu_to_le32(1 << i)) + || ((EXT2_ALGORITHMS_SUPP + & (1 << i)))) + continue; + /* + * TODO: Can't this message be moved outside + * of the for loop? + */ + if (!j) { + if (test_opt(sb, FORCE_COMPAT)) + printk(KERN_WARNING + "EXT2-fs: %s: " + "uses unsupported " + "compression algorithms", + sb->s_id); + else + printk("EXT2-fs: %s: couldn't mount " + "because of unsupported " + "compression algorithms", + sb->s_id); + j = 1; + } + if (i < EXT2_N_ALGORITHMS) + printk(" %s", ext2_algorithm_table[i].name); + else + printk(" %u", i); + } + } + if (j) { + if (test_opt(sb, FORCE_COMPAT)) + printk(" but ignoring as you request.\n"); + else { + printk(".\n"); + goto failed_mount; + } + } +#endif /* CONFIG_EXT2_COMPRESS */ if (!(sb->s_flags & MS_RDONLY) && (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of " --- linux-3.2-rc5/fs/ext2/ialloc.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/ialloc.c 2011-12-13 14:22:47.845975968 +0100 @@ -470,6 +470,9 @@ struct inode *ext2_new_inode(struct inod brelse(bitmap_bh); bitmap_bh = read_inode_bitmap(sb, group); if (!bitmap_bh) { +#ifdef CONFIG_EXT2_COMPRESS + EXT2_I(inode)->i_flags &= ~EXT2_COMPR_FL; +#endif err = -EIO; goto fail; } @@ -558,6 +561,17 @@ got: memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_flags = ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED); +#ifdef CONFIG_EXT2_COMPRESS + /* + * The EXT2_COMPR flag is inherited from the parent + * directory as well as the cluster size and the compression + * algorithm. + */ + ei->i_log2_clu_nblocks = EXT2_I(dir)->i_log2_clu_nblocks; + ei->i_clu_nblocks = EXT2_I(dir)->i_clu_nblocks; + ei->i_compr_method = EXT2_I(dir)->i_compr_method; + ei->i_compr_flags = 0; +#endif ei->i_faddr = 0; ei->i_frag_no = 0; ei->i_frag_size = 0; --- linux-3.2-rc5/fs/ext2/balloc.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/balloc.c 2011-12-13 14:22:47.847976031 +0100 @@ -11,8 +11,13 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ +#ifdef CONFIG_EXT2_COMPRESS +#include +#include +#else #include "ext2.h" #include +#endif #include #include #include @@ -499,6 +504,13 @@ void ext2_free_blocks (struct inode * in struct ext2_super_block * es = sbi->s_es; unsigned freed = 0, group_freed; + +#ifdef CONFIG_EXT2_COMPRESS + assert((block != EXT2_COMPRESSED_BLKADDR) + || !S_ISREG(inode->i_mode) + || !(EXT2_SB(sb)->s_es->s_feature_incompat + & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION))); +#endif if (block < le32_to_cpu(es->s_first_data_block) || block + count < block || block + count > le32_to_cpu(es->s_blocks_count)) { --- linux-3.2-rc5/fs/ext2/inode.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/inode.c 2011-12-13 14:22:47.852976189 +0100 @@ -32,7 +32,14 @@ #include #include #include +#ifdef CONFIG_EXT2_COMPRESS +#include +#include +#include +#include +#else #include "ext2.h" +#endif #include "acl.h" #include "xip.h" @@ -40,6 +47,34 @@ MODULE_AUTHOR("Remy Card and others"); MODULE_DESCRIPTION("Second Extended Filesystem"); MODULE_LICENSE("GPL"); +#ifdef CONFIG_EXT2_COMPRESS +/* mw: this function counts all references + * to this inode. this is necessary to + * refuse un/compression if the file has + * more than one refernce, I guess. */ +int ext2_get_dcount(struct inode *inode) +{ + struct dentry *dentry; + struct list_head *head, *next, *tmp; + int count; + + head = &inode->i_dentry; + next = inode->i_dentry.next; + count = 0; + while (next != head) { + dentry = list_entry(next, struct dentry, d_alias); + tmp = next; + next = tmp->next; + spin_lock(&dentry->d_lock); + count += dentry->d_count; + spin_unlock(&dentry->d_lock); + //mw: similar to fs/dcache.c + } + + return count; +} +#endif + static int __ext2_write_inode(struct inode *inode, int do_sync); /* @@ -54,7 +89,9 @@ static inline int ext2_inode_is_fast_sym inode->i_blocks - ea_blocks == 0); } +#ifndef CONFIG_EXT2_COMPRESS static void ext2_truncate_blocks(struct inode *inode, loff_t offset); +#endif static void ext2_write_failed(struct address_space *mapping, loff_t to) { @@ -240,7 +277,11 @@ static Indirect *ext2_get_branch(struct *err = 0; /* i_data is not going away, no lock needed */ add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets); +#ifdef CONFIG_EXT2_COMPRESS + if (HOLE_BLKADDR(p->key)) +#else if (!p->key) +#endif goto no_block; while (--depth) { bh = sb_bread(sb, le32_to_cpu(p->key)); @@ -251,7 +292,11 @@ static Indirect *ext2_get_branch(struct goto changed; add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); read_unlock(&EXT2_I(inode)->i_meta_lock); +#ifdef CONFIG_EXT2_COMPRESS + if (HOLE_BLKADDR(p->key)) +#else if (!p->key) +#endif goto no_block; } return NULL; @@ -297,7 +342,11 @@ static ext2_fsblk_t ext2_find_near(struc /* Try to find previous block */ for (p = ind->p - 1; p >= start; p--) +#ifdef CONFIG_EXT2_COMPRESS + if (!HOLE_BLKADDR(*p)) +#else if (*p) +#endif return le32_to_cpu(*p); /* No such thing, so let's try location of indirect block */ @@ -498,7 +547,13 @@ static int ext2_alloc_branch(struct inod */ bh = sb_getblk(inode->i_sb, new_blocks[n-1]); branch[n].bh = bh; +#ifndef CONFIG_EXT2_COMPRESS lock_buffer(bh); +#else + CHECK_NOT_ATOMIC + if (!buffer_uptodate(bh)) + wait_on_buffer(bh); +#endif memset(bh->b_data, 0, blocksize); branch[n].p = (__le32 *) bh->b_data + offsets[n]; branch[n].key = cpu_to_le32(new_blocks[n]); @@ -514,7 +569,9 @@ static int ext2_alloc_branch(struct inod *(branch[n].p + i) = cpu_to_le32(++current_block); } set_buffer_uptodate(bh); +#ifndef CONFIG_EXT2_COMPRESS unlock_buffer(bh); +#endif mark_buffer_dirty_inode(bh, inode); /* We used to sync bh here if IS_SYNC(inode). * But we now rely upon generic_write_sync() @@ -675,6 +732,7 @@ static int ext2_get_blocks(struct inode if (err == -EAGAIN || !verify_chain(chain, partial)) { while (partial > chain) { brelse(partial->bh); +// bforget(partial->bh); /*mw: e2c-pre-2.6.30.4 used bforget here*/ partial--; } partial = ext2_get_branch(inode, depth, offsets, chain, &err); @@ -766,21 +824,608 @@ int ext2_fiemap(struct inode *inode, str ext2_get_block); } +#ifdef CONFIG_EXT2_COMPRESS +/* + * Readpage method that will take care of decompression. + */ +/* effic: I (pjm) think tht at present, reading a 32KB cluster 4KB at + a time does `decompress 4KB' for the first 4KB, then `decompress + 8KB' for the second, and so on. See if we can provide the page + cache with all the pages in a cluster. The problem is, we don't + want to erase anything tht hasn't been written to disk, so we can't + just call update_vm_cache(). The plan at present is to remember + what the contents of ext2_rd_wa.u come from, and don't bother + decompressing anything if the working area already contains the + right data. However, this is only a win where adjacent calls to + ext2_decompress_blocks() request the same cluster. We could force + that by copying some code from generic_file_read() (but check for + deadlocks before doing anything like that), but instead I'm taking + the more passive approach of hoping for the best. */ +static int ext2_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES]; + u32 cluster0, max_cluster; + int i, blockOfCluster, blocksToDo, npg; + const int inc = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; + struct ext2_inode_info *ei = EXT2_I(page->mapping->host); +#ifdef CONFIG_HIGHMEM + int kmapped = 0; //mw +#endif + + int iClusterCnt; + + /* For directories, fall out through default routine */ + if (S_ISDIR(inode->i_mode)) + { + int rc; + + rc = block_read_full_page(page,ext2_get_block); + assert(!rc); + return rc; + } + + /* The semaphore prevents us trying to compress and decompress + the cluster at the same time, or compress a cluster in the + middle of reading it (thinking it to be uncompressed). + + You may not like the fact that we hold the semaphore across + readpage (given that it isn't held without e2compr compiled + in), but it does guarantee that we won't compress the + cluster during readpage. (OTOH, it's unlikely, if not + impossible, for someone to ,compress a cluster and rewrite + the blocks` before the readpage completes.) */ + /* This procedure used to have `#ifndef EXT2_LOCK_BUFFERS' + around all the semaphore stuff, and unlocked each buffer + before brelsing them ifdef EXT2_LOCK_BUFFERS. I (pjm, + 1998-01-20) have removed that because (a) EXT2_LOCK_BUFFERS + isn't #defined anywhere, and doesn't appear outside of this + function, and (b) I haven't looked at what effect locking + the buffers has. You may like to reintroduce the idea of + buffer locking to this function if you're more familiar + with buffer locking than I, and believe that the full i_sem + isn't necessary to protect from races (people seeing raw + compressed data) between readpage and ext2_file_write(), + ext2_compress_cluster() and ext2_truncate(). */ + unlock_page(page); + mutex_lock(&inode->i_mutex); + + assert (atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */ + + //mw: added EXT2_COMPR_FL, because EXT2_COMPRBLK_FL mit change without mutex !!! + if ( !(ei->i_flags & (EXT2_COMPRBLK_FL|EXT2_COMPR_FL)) + || (ei->i_flags & EXT2_NOCOMPR_FL) ) + { + goto readpage_uncompressed; + } + + { + register u32 blockOfFile + = (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits; + + blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; + cluster0 = ext2_block_to_cluster(inode, blockOfFile); + max_cluster = ext2_block_to_cluster + (inode, blockOfFile + blocksToDo - 1); + blockOfCluster + = blockOfFile - ext2_cluster_block0(inode, cluster0); + } + + /* return -???, any idea which code. do_generic_file_read() cares, ext2_readpages() doesn't. + maybe I should look at the "generic" readpage() and see what it returns in this case */ + + /* Check if any part of the requested area contains part of a + compressed cluster. If not, we can use default ext2_readpage(). + + (Note that we don't have to worry about a cluster becoming + compressed in the meantime, because we have the semaphore.) + + A page can cover up to 9 clusters. (The maximum can only + occur with 32KB pages, 4KB clusters, and a non-page-aligned + offset. Thanks go to Kurt Fitzner for reporting that + page offsets needn't be aligned; see generic_file_mmap().) */ + { + int isCmp[(PAGE_SIZE >> 12) + 1]; + u8 *dst; + unsigned clu_ix; + + assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp)); + for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) { + isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix); + if (isCmp[clu_ix] < 0){ + printk("IO-ERROR: isCmp\n"); + goto io_error; + } + } + + for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) + if (isCmp[clu_ix] > 0) + goto readpage_compressed; + /* fall through */ + readpage_uncompressed: + { + int rc=0; + lock_page(page); + + /* Did somebody else fill it already? */ + if (PageUptodate(page) ){ //mw: necessary for DEBUG! anyway checked in do_generic_mapping_read + unlock_page(page); + } + else { + //try_to_free_buffers(page); + rc = block_read_full_page(page,ext2_get_block); + } + mutex_unlock(&inode->i_mutex); + assert(!rc); + return rc; + } + + readpage_compressed: + + /* Copied from block_read_full_page */ + /* if (!PageLocked(page)) */ + /* PAGE_BUG(page); */ + lock_page(page); + if (PageUptodate(page)) { + unlock_page(page); + mutex_unlock(&inode->i_mutex); + return(0); + } + get_page(page); + + ClearPageUptodate(page); + ClearPageError(page); + + dst = (u8 *) page_address(page); + for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) { + struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS]; + int nbh, blocksThisClu; + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + pg[i] = NULL; + epg[i] = NULL; + } + + /* clear_bit(PG_locked, &page->flags); */ + npg = ext2_cluster_npages(inode, cluster0 + clu_ix); + nbh = ext2_get_cluster_pages(inode, cluster0 + clu_ix, pg, page, 0); + + if (nbh <= 0) { + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) + printk("no pages\n"); + goto out; + } + iClusterCnt = ext2_cluster_npages(inode, cluster0); + + nbh = ext2_get_cluster_extra_pages(inode, cluster0 + clu_ix, pg, epg); + if (nbh <= 0) + { + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) + epg[i] = NULL; + printk("no extra pages\n"); + goto out; + } + assert (iClusterCnt = ext2_cluster_npages(inode, cluster0)); + +#ifdef CONFIG_HIGHMEM + ext2_kmap_cluster_pages(page, pg, epg); + kmapped = 1; +#endif + + nbh = ext2_get_cluster_blocks(inode, cluster0 + clu_ix, bh, pg, epg, 0); + if (nbh <= 0) + { + printk("no blocks\n"); + goto out; + } + + /* How many blocks (including holes) we need from this cluster. */ + { + blocksThisClu = (ext2_cluster_nblocks(inode, cluster0 + + clu_ix) - blockOfCluster); + if (blocksThisClu > blocksToDo) + blocksThisClu = blocksToDo; + } + + if (isCmp[clu_ix]) { + u8 const *src; + int n, nbytes_wanted; + struct ext2_cluster_head *head; + unsigned meth; +# ifdef CONFIG_KMOD + unsigned alg; +# endif + + bh[0]->b_data = page_address(bh[0]->b_page); + head = (struct ext2_cluster_head *) bh[0]->b_data; + + /* jmr 1998-10-28 Hope this is the last time I'm moving this code. + * Module loading must be done _before_ we lock wa, just think what + * can happen if we reallocate wa when somebody else uses it... + */ + meth = head->method; /* only a byte, so no swabbing needed. */ + if (meth >= EXT2_N_METHODS) { + printk("illegal method id\n"); + ext2_msg(inode->i_sb, + "illegal method id", + "inode = %lu, id = %u", + inode->i_ino, meth); + goto out; + } +# ifdef CONFIG_KMOD + alg = ext2_method_table[meth].alg; + if (!ext2_algorithm_table[alg].avail) { + char str[32]; + + sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name); + request_module(str); + } +# endif /* CONFIG_KMOD */ + + /* Calculate nbytes_wanted. */ + { + unsigned nblk_wanted, i; + + /* We want to decompress the whole cluster */ + //nblk_wanted = ext2_cluster_nblocks(inode, cluster0 + clu_ix); + nblk_wanted = npg << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); /*mw: FIXED */ + + for (i = nblk_wanted; i != 0;) + if (((--i >> 3) < head->holemap_nbytes) + && (head->holemap[i >> 3] & (1 << (i & 7)))) + --nblk_wanted; + nbytes_wanted = (nblk_wanted + << inode->i_sb->s_blocksize_bits); + } + + /* Decompress. */ + get_cpu_var(ext2_rd_wa); + if (__get_cpu_var(ext2_rd_wa) == NULL) + { + ext2_alloc_rd_wa(); + } + assert(__get_cpu_var(ext2_rd_wa) != NULL); + + n = ext2_decompress_blocks(inode, bh, nbh, nbytes_wanted, cluster0 + clu_ix); + if (n < 0) { + assert(nbh >= 0); + printk("ext2_readpage: noblocks decompressed\n"); + put_cpu_var(ext2_rd_wa); + goto out; + } + +# ifdef EXT2_COMPR_REPORT_VERBOSE_INODE + if (ei->i_flags & EXT2_COMPR_FL) + printk(KERN_DEBUG "ext2: mmap %04x:%lu: blocksToDo=%d, blockOfCluster=%d, blocksThisClu=%d, clu_nblocks=%d\n", + inode->i_rdev, + inode->i_ino, + blocksToDo, + blockOfCluster, + blocksThisClu, + ext2_cluster_nblocks(inode, cluster0 + clu_ix)); +# endif + + /* */ + { + unsigned i; + int ipg; + + i = ext2_cluster_nblocks(inode, cluster0 + clu_ix) - 1; + //i = (npg << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - 1; /*mw: FIXED!!! (here: shift = 2Bit) */ + //if(i+1 != ext2_cluster_nblocks(inode, cluster0 + clu_ix)) + //printk("npg=%i, nbh=%i, npgf=%i, nbhf =%i, cluster:%i, dec_blk:%i, b_wanted:%i, size:%i\n ", ext2_cluster_npages(inode, cluster0 + clu_ix), ext2_cluster_nblocks(inode, cluster0 + clu_ix), npgtest, i+1, cluster0 + clu_ix, n, nbytes_wanted, inode->i_size); + blockOfCluster = 0; + assert(n > 0); + src = __get_cpu_var(ext2_rd_wa)->u + nbytes_wanted - inode->i_sb->s_blocksize; +#ifdef EXT2_COMPR_REPORT + trace_e2c("ext2_readpage: copy data inc=%d blocksThisClu=%d, n=%d\n", inc, blocksThisClu, n); +#endif + for (ipg = npg - 1; ipg >= 0; ipg--) { + if (pg[ipg] == NULL) { + i -= inc; + src -= PAGE_SIZE; + continue; + } + if (((inode->i_size-1) >> PAGE_SHIFT) == pg[ipg]->index) { + n = ((inode->i_size-1) & (PAGE_SIZE -1)) >> inode->i_sb->s_blocksize_bits; + i -= ((blocksThisClu-1) - n); + src -= ((blocksThisClu-1) - n) << inode->i_sb->s_blocksize_bits; + } else { + n = blocksThisClu - 1; + } + if (PageUptodate(pg[ipg]) ) { + for (;n >= 0;n--, i--) { + if (((i >> 3) >= head->holemap_nbytes) + || !(head->holemap[i >> 3] & (1 << (i & 7)))) { + src -= inode->i_sb->s_blocksize; + } + } + } else { + + dst = (u8 *) page_address(pg[ipg]) + (n << inode->i_sb->s_blocksize_bits); + + for (; + n >= 0; + n--, i--, dst -= inode->i_sb->s_blocksize) { + assert(!buffer_dirty(bh[i])); + clear_buffer_dirty(bh[i]); //mw: had a refile_buffer in 2.4 + if (((i >> 3) >= head->holemap_nbytes) + || !(head->holemap[i >> 3] & (1 << (i & 7)))) { + assert(i >= 0); + memcpy(dst, src, inode->i_sb->s_blocksize); + src -= inode->i_sb->s_blocksize; + } else { + assert(i >= 0); + memset (dst, 0, inode->i_sb->s_blocksize); + } + //clear_bit(BH_Uptodate, &bh[i]->b_state); + } + SetPageUptodate(pg[ipg]); + } + } + } + put_cpu_var(ext2_rd_wa); + } else { + /* Uncompressed cluster. Just copy the data. */ + int n; + +# ifdef EXT2_COMPR_REPORT_VERBOSE_INODE + if (ei->i_flags & EXT2_COMPR_FL) + printk(KERN_DEBUG + "ext2: mmap %lu: blocksToDo = %d, " + "blockOfCluster = %d, clu_nblocks = %d\n", + inode->i_ino, blocksToDo, blockOfCluster, + ext2_cluster_nblocks(inode, cluster0 + + clu_ix)); +# endif + + for (n = 0; + n < blocksThisClu; + n++, dst += inode->i_sb->s_blocksize) { + if ((blockOfCluster + n < nbh) + && (bh[blockOfCluster + n] != NULL)) + { + memcpy(dst, + bh[blockOfCluster + n]->b_data, + inode->i_sb->s_blocksize); + } + else + { + memset(dst, 0, inode->i_sb->s_blocksize); + } + } + blockOfCluster = 0; + } // end uncompressed Cluster + + blocksToDo -= blocksThisClu; + +#ifdef CONFIG_HIGHMEM + if (kmapped) + ext2_kunmap_cluster_pages(page, pg, epg); +#endif + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (epg[i] != NULL) { + + ClearPageDirty(epg[i]); + ClearPageUptodate(epg[i]); + try_to_free_buffers(epg[i]); + unlock_page(epg[i]); + assert(page_count(epg[i]) <= 1); + page_cache_release(epg[i]); + } + } + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (pg[i] == NULL) + break; + if (pg[i] == page) + continue; + unlock_page(pg[i]); + page_cache_release(pg[i]); + } + //mw + assert (isCmp[clu_ix] == ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix)); + } // end for-loop: Cluster + } + + SetPageUptodate(page); + unlock_page(page); + atomic_dec(&page->_count); + mutex_unlock(&inode->i_mutex); + return 0; + + out: + +#ifdef CONFIG_HIGHMEM + if (kmapped) + ext2_kunmap_cluster_pages(page, pg, epg); +#endif + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (epg[i] != NULL) { + + ClearPageDirty(epg[i]); + ClearPageUptodate(epg[i]); + try_to_free_buffers(epg[i]); + unlock_page(epg[i]); + assert(page_count(epg[i]) <= 1); + page_cache_release(epg[i]); + } + } + + for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) { + if (pg[i] == NULL) + break; + if (pg[i] == page) + continue; + unlock_page(pg[i]); + page_cache_release(pg[i]); + } + mutex_unlock(&inode->i_mutex); + return 0; + + io_error: +#ifdef CONFIG_HIGHMEM + if (kmapped) + ext2_kunmap_cluster_pages(page, pg, epg); +#endif + SetPageError(page); + unlock_page(page); + atomic_dec(&page->_count); + mutex_unlock(&inode->i_mutex); + printk("Readpage: IOERROR\n"); + return -EIO; /* it is tested in do_generic_file_read(), ... */ +} +#endif /* CONFIG_EXT2_COMPRESS */ + static int ext2_writepage(struct page *page, struct writeback_control *wbc) { +/* mw (24/06/2008): + * WRITEPAGE: this code was also in e2compr 2.4 and once removed by yaboo ding. + * ext2_writepage() is also called for dirty pages. Usually we write using file_write() which + * wraps correctly to compressed files. BUT: a writeable memory map might + * produce dirty pages, which will be written back normally. this should/might fail. + * The following code should fix this bug, but this was not tested yet. + */ +#ifdef CONFIG_EXT2_COMPRESS +#undef USE_WRITEPAGE +//#define USE_WRITEPAGE +#ifdef USE_WRITEPAGE + + struct ext2_inode_info *ei = EXT2_I(page->mapping->host); + int retval; + + struct inode *inode = page->mapping->host; + u32 cluster0, max_cluster; + int blocksToDo; + + unlock_page(page); + //mw: do we need this ??? + //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) { + /* trace_e2c("ext2_writepage: inode"); */ + mutex_lock(&inode->i_mutex); + /* trace_e2c(" down\n"); */ + //} + if (!(ei->i_flags & EXT2_COMPRBLK_FL) + || (ei->i_flags & EXT2_NOCOMPR_FL) ) + { + //mw: do we need this ??? + //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) { + /* trace_e2c("ext2_writepage: inode up 1\n"); */ + mutex_unlock(&inode->i_mutex); + //} + lock_page(page); + return block_write_full_page(page, ext2_get_block, wbc); + } + /* */ + { + register u32 blockOfFile + = (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits; + + blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; + cluster0 = ext2_block_to_cluster(inode, blockOfFile); + max_cluster = ext2_block_to_cluster(inode, blockOfFile + blocksToDo - 1); + } + + /* Check if any part of the requested area contains part of a + compressed cluster. If not, we can use default ext2_writepage(). + + (Note that we don't have to worry about a cluster becoming + compressed in the meantime, because we have the semaphore.) + + A page can cover up to 9 clusters. (The maximum can only + occur with 32KB pages, 4KB clusters, and a non-page-aligned + offset. Thanks go to Kurt Fitzner for reporting that + page offsets needn't be aligned; see generic_file_mmap().) */ + + { + int isCmp[(PAGE_SIZE >> 12) + 1]; + unsigned clu_ix; + + assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp)); + for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) { + isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix); + if (isCmp[clu_ix] < 0) { + //mw: do we need this ???if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) { + /* trace_e2c("ext2_writepage: inode up 2\n"); */ + lock_page(page); + mutex_unlock(&inode->i_mutex); + //} + return -EIO; + } + } + + for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) + if (isCmp[clu_ix] > 0) + ext2_decompress_cluster(inode, cluster0 + clu_ix); + + //mw: do we need this ??? + //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) { + /* trace_e2c("ext2_writepage: inode up 3\n"); */ + mutex_unlock(&inode->i_mutex); + //} + lock_page(page); + + /* fall through */ + } +#endif /* CONFIG_EXT2_COMPRESS */ +#endif return block_write_full_page(page, ext2_get_block, wbc); } +#ifndef CONFIG_EXT2_COMPRESS static int ext2_readpage(struct file *file, struct page *page) { return mpage_readpage(page, ext2_get_block); } +#endif static int ext2_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { +#ifdef CONFIG_EXT2_COMPRESS +/* + * For now, just read each page into cache and don't worry about emitting BIOs. + * (whitpa 02 Aug 2004). + */ + + unsigned page_idx; + struct pagevec lru_pvec; + int iError; + + pagevec_init(&lru_pvec, 0); + + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + + iError = add_to_page_cache(page, mapping, page->index, GFP_KERNEL); + if (!iError) { + if (!PageUptodate(page)) + { + (void) ext2_readpage(file, page); + } + else + { + unlock_page(page); + } + if (!pagevec_add(&lru_pvec, page)) + __pagevec_lru_add_file(&lru_pvec); + } else { + page_cache_release(page); + } + + } + pagevec_lru_add_file(&lru_pvec); + BUG_ON(!list_empty(pages)); + return 0; +#else return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); +#endif } static int @@ -829,11 +1474,58 @@ static int ext2_nobh_writepage(struct pa return nobh_writepage(page, ext2_get_block, wbc); } +#ifdef CONFIG_EXT2_COMPRESS +static sector_t ext2_do_bmap(struct address_space *mapping, sector_t block) +#else static sector_t ext2_bmap(struct address_space *mapping, sector_t block) +#endif { return generic_block_bmap(mapping,block,ext2_get_block); } +#ifdef CONFIG_EXT2_COMPRESS +/* Return 0 instead of EXT2_COMPRESSED_BLKADDR if EXT2_NOCOMPR_FL + * high. This is necessary for us to be able to use + * generic_readpage() when EXT2_NOCOMPR_FL is high. + */ +static sector_t ext2_bmap(struct address_space *mapping, sector_t block) +{ + sector_t result; + struct inode *inode = mapping->host; + + if ((EXT2_I(inode)->i_flags & (EXT2_COMPRBLK_FL | EXT2_NOCOMPR_FL)) + == (EXT2_COMPRBLK_FL | 0)) { + int err; + + err = ext2_cluster_is_compressed_fn + (inode, ext2_block_to_cluster(inode, block)); + if (err > 0) + ext2_msg (inode->i_sb, "ext2_bmap", + "compressed cluster, inode %lu", + inode->i_ino); + if (err != 0) + return 0; + } + + result = ext2_do_bmap(mapping, block); + if (result != EXT2_COMPRESSED_BLKADDR) + return result; + + if (!(EXT2_SB(inode->i_sb)->s_es->s_feature_incompat + & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION))) + ext2_error(inode->i_sb, "ext2_bmap", + "compressed_blkaddr (ino %lu, blk %lu) " + "on non-compressed fs", + inode->i_ino, (unsigned long) block); + if (!S_ISREG(inode->i_mode)) + ext2_error(inode->i_sb, "ext2_bmap", + "compressed_blkaddr for non-regular file " + "(ino %lu, blk %lu)", + inode->i_ino, (unsigned long) block); + return 0; +} +#endif /* CONFIG_EXT2_COMPRESS */ + static ssize_t ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -853,6 +1545,18 @@ ext2_direct_IO(int rw, struct kiocb *ioc static int ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) { +#ifdef CONFIG_EXT2_COMPRESS +#ifdef USE_WRITEPAGE + struct ext2_inode_info *ei = EXT2_I(mapping->host); + if ( (ei->i_flags & EXT2_COMPRBLK_FL) + && !(ei->i_flags & EXT2_NOCOMPR_FL)) + { + //NULL will invoke ext2_writepage for writeback, hopefully. + return mpage_writepages(mapping, wbc, NULL); + } + else +#endif +#endif return mpage_writepages(mapping, wbc, ext2_get_block); } @@ -1001,6 +1705,12 @@ static inline void ext2_free_data(struct for ( ; p < q ; p++) { nr = le32_to_cpu(*p); +#ifdef CONFIG_EXT2_COMPRESS + if (nr == EXT2_COMPRESSED_BLKADDR) { + *p = 0; + continue; + } +#endif if (nr) { *p = 0; /* accumulate blocks to free if they're contiguous */ @@ -1045,6 +1755,12 @@ static void ext2_free_branches(struct in nr = le32_to_cpu(*p); if (!nr) continue; +#ifdef CONFIG_EXT2_COMPRESS + if (nr == EXT2_COMPRESSED_BLKADDR) { + *p = 0; + continue; + } +#endif *p = 0; bh = sb_bread(inode->i_sb, nr); /* @@ -1069,6 +1785,96 @@ static void ext2_free_branches(struct in ext2_free_data(inode, p, q); } +/* pjm 1998-01-14: As far as I can tell, "I don't do any locking" is + no longer correct, as i_sem is downed for all write() and + truncate() stuff except where it doesn't matter (e.g. new inode). */ + +#ifdef CONFIG_EXT2_COMPRESS +/* If the EXT2_ECOMPR_FL bit is high, then things can go rather badly. + This can only happen if access permission was obtained before the + flag was raised. Also, it shouldn't be too much of a problem + unless the end point of truncation is a compressed cluster with a + compression error. */ + + /* From what I (Antoine) understand, the complexity of the truncate + code is due to the fact that we don't want to free blocks that + are still referenced. It does not ensure that concurrent read + operation will terminate properly, i.e., the semantic of reading + while somebody truncates is undefined (you can either get the old + data if you got the blocks before, or get plenty of zeros + otherwise). */ + +/* todo: Provide error trapping in readiness for when i_op->truncate + allows a return code. */ +static void fix_compression (struct inode * inode) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + /*if (atomic_read(&inode->i_mutex.count) > 0) + { + printk("Assert Mutex failed for file: %s \n", inode_name(inode, 0)); + dump_stack(); + }*/ + + assert (ei->i_flags & EXT2_COMPRBLK_FL); /* one or more compressed clusters */ + assert ((atomic_read(&inode->i_mutex.count) < 1) + || ((inode->i_nlink == 0) + && (atomic_read(&inode->i_count) == 0))); + /* pjm 1998-01-14: I think the below comment can safely be removed, as + it's impossible for someone to be compressing during truncate(), because + i_sem is down. */ + /* Dans le cas ou les clusters peuvent etre compresses, cela pose + un probleme : il faudrait stopper aussi si le cluster est + comprime et ne contient pas plus de donnees que i_size ne + permet. Sinon, on peut passer son temps a decompresser un + cluster que quelqu'un d'autre compresse en meme + temps... (TODO). Cela ne peut arriver que si on reverifie apres + coup si le cluster est non compresse (ce qu'on fait a l'heure + actuelle) => faire autrement. + + pjm fixme tr + + If the clusters can be compressed, we'd have a problem: we'd + also need to stop if the cluster is compressed and doesn't + contain more data than i_size permits. Otherwise we can spend + time decompressing a cluster that someone else is compressing + at the same time. (TODO.) This can only happen if we reverify + "apres coup" ("after the event"? "after each time"?) "si" ("if" + or "that") the cluster is not compressed (as we are currently + doing) => do differently. */ + + /* todo: Handle errors from ext2_cluster_is_compressed(). + (Except ext2_truncate() currently silently ignores errors + anyway.) */ + + if (!ext2_offset_is_clu_boundary(inode, inode->i_size) + && (! ( ei->i_flags & EXT2_NOCOMPR_FL)) + && (ext2_cluster_is_compressed_fn + (inode, ext2_offset_to_cluster (inode, inode->i_size)) + > 0)) { + trace_e2c("fix_compression: inode:%lu decompress_cluster!\n", inode->i_ino); + ext2_decompress_cluster(inode, ext2_offset_to_cluster(inode, inode->i_size)); + /* todo: Check the return code of + ext2_decompress_cluster(). (Then again, I don't + know how to report an error anyway. + ext2_truncate() silently ignores errors.) */ + + /* Organise for the cluster to be recompressed later. */ + assert (ei->i_flags & EXT2_COMPR_FL); + + ei->i_flags |= EXT2_DIRTY_FL; + ei->i_compr_flags |= EXT2_CLEANUP_FL; + mark_inode_dirty(inode); + } else + /* If there are no more compressed clusters, then + remove the EXT2_COMPRBLK_FL. Not essential from a + safety point of view, but friendlier. We only do + this in the `else' because the cleanup function + will handle it in the `if' case. */ + ext2_update_comprblk(inode); +} +#endif + + static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) { __le32 *i_data = EXT2_I(inode)->i_data; @@ -1081,6 +1887,27 @@ static void __ext2_truncate_blocks(struc int n; long iblock; unsigned blocksize; + +#ifdef CONFIG_EXT2_COMPRESS + /* If the new size is in the middle of a compressed cluster, + then we decompress it, and set things up to be recompressed + later. + + todo: It isn't very nice to get ENOSPC on truncate. We + can't completely remove the possibility (unless the + compression algorithms obey the rule `shorter input never + gives longer output') but we could greatly reduce the + possibility, e.g. by moving the fix_compression() function + to compress.c, and have it decompress and immediately + recompress the cluster, without allocating blocks for the + full decompressed data. */ + if (EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL) { + trace_e2c("ext2_truncate: ino=%ld sz=%d\n", inode->i_ino, (int)inode->i_size); + fix_compression(inode); + truncate_inode_pages(inode->i_mapping, inode->i_size); + } +#endif + blocksize = inode->i_sb->s_blocksize; iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); @@ -1151,8 +1978,11 @@ do_indirects: mutex_unlock(&ei->truncate_mutex); } - +#ifdef CONFIG_EXT2_COMPRESS +void ext2_truncate_blocks(struct inode *inode, loff_t offset) +#else static void ext2_truncate_blocks(struct inode *inode, loff_t offset) +#endif { /* * XXX: it seems like a bug here that we don't allow @@ -1340,7 +2170,73 @@ struct inode *ext2_iget (struct super_bl goto bad_inode; } inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); +#ifdef CONFIG_EXT2_COMPRESS + ei->i_flags = 0x807fffff & le32_to_cpu(raw_inode->i_flags); + ei->i_compr_flags = 0; + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) { + + if (S_ISDIR(inode->i_mode)) + { + //mw: + //mutex_lock(&inode->i_mutex); + if (S_ISDIR(inode->i_mode)) + { + ei->i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL); //modify!!! + } + //mutex_unlock(&inode->i_mutex); + } + + /* The above shouldn't be necessary unless someone's + * been playing with EXT2_IOC_SETFLAGS on a non-e2compr + * kernel, or the inode has been scribbled on. + */ + if (ei->i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL)) { + ei->i_compr_method + = (le32_to_cpu(raw_inode->i_flags) >> 26) & 0x1f; + ei->i_log2_clu_nblocks + = (le32_to_cpu(raw_inode->i_flags) >> 23) & 0x7; + if ((ei->i_log2_clu_nblocks < 2) + || (ei->i_log2_clu_nblocks > 5)) { + if ((ei->i_log2_clu_nblocks == 0) + && !(ei->i_flags & EXT2_COMPRBLK_FL)) { + /* The EXT2_COMPR_FL flag was + * raised under a kernel + * without e2compr support. + */ + if (S_ISREG(inode->i_mode)) + ei->i_flags |= EXT2_DIRTY_FL; + /* Todo: once we're sure the kernel can + * handle [log2_]clu_nblocks==0, get rid + * of the next statement. + */ + ei->i_log2_clu_nblocks + = EXT2_DEFAULT_LOG2_CLU_NBLOCKS; + } else { + ei->i_flags |= EXT2_ECOMPR_FL; + ext2_error(inode->i_sb, + "ext2_read_inode", + "inode %lu is corrupted: " + "log2_clu_nblocks=%u", + inode->i_ino, + ei->i_log2_clu_nblocks); + } + } + } else { + ei->i_compr_method = EXT2_DEFAULT_COMPR_METHOD; + ei->i_log2_clu_nblocks + = EXT2_DEFAULT_LOG2_CLU_NBLOCKS; + } + if (ei->i_log2_clu_nblocks > + (EXT2_LOG2_MAX_CLUSTER_BYTES - inode->i_sb->s_blocksize_bits)) + ei->i_log2_clu_nblocks = (EXT2_LOG2_MAX_CLUSTER_BYTES + - inode->i_sb->s_blocksize_bits); + ei->i_clu_nblocks = 1 << ei->i_log2_clu_nblocks; + if (ei->i_flags & EXT2_DIRTY_FL) + ei->i_compr_flags = EXT2_CLEANUP_FL; + } +#else /* !CONFIG_EXT2_COMPRESS */ ei->i_flags = le32_to_cpu(raw_inode->i_flags); +#endif ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); ei->i_frag_no = raw_inode->i_frag; ei->i_frag_size = raw_inode->i_fsize; @@ -1463,7 +2359,35 @@ static int __ext2_write_inode(struct ino raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); +#ifdef CONFIG_EXT2_COMPRESS + if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) + && (ei->i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL))) { + if ((ei->i_log2_clu_nblocks < 2) + || (ei->i_log2_clu_nblocks > 5)) { + ei->i_flags |= EXT2_ECOMPR_FL; + ext2_error (inode->i_sb, "ext2_write_inode", + "inode %lu is corrupted: log2_clu_nblocks=%u", + inode->i_ino, ei->i_log2_clu_nblocks); + } + assert (ei->i_clu_nblocks == (1 << ei->i_log2_clu_nblocks)); + assert (ei->i_compr_method < 0x20); + raw_inode->i_flags = cpu_to_le32 + ((ei->i_flags & 0x807fffff) + | (ei->i_compr_method << 26) + | (ei->i_log2_clu_nblocks << 23)); + } else + { + //mw: i_mutex was introduced and disabled again: deadlock with lilo + // mutex_lock(&inode->i_mutex); //mw + raw_inode->i_flags = cpu_to_le32 //modify !!! + (ei->i_flags + & 0x807fffff /* no compr meth/size */ + & ~(EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_IMMUTABLE_FL | EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL)); + // mutex_unlock(&inode->i_mutex); //mw + } +#else raw_inode->i_flags = cpu_to_le32(ei->i_flags); +#endif raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); raw_inode->i_frag = ei->i_frag_no; raw_inode->i_fsize = ei->i_frag_size; --- linux-3.2-rc5/fs/ext2/file.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/file.c 2011-12-13 14:22:47.853976220 +0100 @@ -18,10 +18,25 @@ * (jj@sunsite.ms.mff.cuni.cz) */ +#ifdef CONFIG_EXT2_COMPRESS +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#else #include #include #include #include "ext2.h" +#endif + + #include "xattr.h" #include "acl.h" @@ -30,8 +45,39 @@ * for a single struct file are closed. Note that different open() calls * for the same file yield different struct file structures. */ + +/* + * pjm 1998-01-09: I would note that this is different from `when no + * process has the inode open'. + */ static int ext2_release_file (struct inode * inode, struct file * filp) { +#ifdef CONFIG_EXT2_COMPRESS + /* + * Now's as good a time as any to clean up wrt compression. + * Previously (before 2.1.4x) we waited until + * ext2_put_inode(), but now the dcache sometimes delays that + * call until umount time. + */ + //printk(KERN_DEBUG "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n", current->pid, inode->i_ino, atomic_read(&inode->i_count)); + + if (S_ISREG (inode->i_mode) + && inode->i_nlink + && (EXT2_I(inode)->i_compr_flags & EXT2_CLEANUP_FL)) { +#ifdef EXT2_COMPR_REPORT_PUT + printk(KERN_DEBUG "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n", current->pid, inode->i_ino, atomic_read(&inode->i_count)); +#endif + /* + * todo: See how the return code of + * ext2_release_file() is used, and decide whether it + * might be appropriate to pass any errors to + * caller. + */ + //dump_stack(); + (void) ext2_cleanup_compressed_inode (inode); + } + +#endif if (filp->f_mode & FMODE_WRITE) { mutex_lock(&EXT2_I(inode)->truncate_mutex); ext2_discard_reservation(inode); @@ -56,6 +102,456 @@ int ext2_fsync(struct file *file, loff_t return ret; } +#ifdef CONFIG_EXT2_COMPRESS +struct page_cluster { + struct page * page; + loff_t pos; + unsigned bytes; + unsigned long offset; + unsigned char in_range; + const char * buf; +}; + +#define PAGE_IN_RANGE 1 +#define PAGE_KMAPPED 2 + + +/** + * generic_osync_inode - flush all dirty data for a given inode to disk + * @inode: inode to write + * @mapping: the address_space that should be flushed + * @what: what to write and wait upon + * + * This can be called by file_write functions for files which have the + * O_SYNC flag set, to flush dirty writes to disk. + * + * @what is a bitmask, specifying which part of the inode's data should be + * written and waited upon. + * + * OSYNC_DATA: i_mapping's dirty data + * OSYNC_METADATA: the buffers at i_mapping->private_list + * OSYNC_INODE: the inode itself + */ + +/* mw: see generic_osync_inode() in kernel<2.6.30 for orginal method. + basically we want all of it: OSYNC_DATA and OSYNC_METADATA and OSYNC_INODE */ +int ex_generic_osync_inode(struct inode *inode, struct address_space *mapping) //, int what) +{ + int err = 0; + int need_write_inode_now = 0; + int err2; + + err = filemap_fdatawrite(mapping); + + err2 = sync_mapping_buffers(mapping); + if (!err) + err = err2; + + err2 = filemap_fdatawait(mapping); + if (!err) + err = err2; + + /* check if data is dirty */ + spin_lock(&inode->i_lock); + if (inode->i_state & I_DIRTY) + need_write_inode_now = 1; + spin_unlock(&inode->i_lock); + + if (need_write_inode_now) { + err2 = write_inode_now(inode, 1); + if (!err) + err = err2; + } + else + inode_sync_wait(inode); + + return err; +} + + +/* + * Write to a file through the page cache. + * + * We currently put everything into the page cache prior to writing it. + * This is not a problem when writing full pages. With partial pages, + * however, we first have to read the data into the cache, then + * dirty the page, and finally schedule it for writing. Alternatively, we + * could write-through just the portion of data that would go into that + * page, but that would kill performance for applications that write data + * line by line, and it's prone to race conditions. + * + * Note that this routine doesn't try to keep track of dirty pages. Each + * file system has to do this all by itself, unfortunately. + * okir@monad.swb.de + */ +ssize_t +ext2_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) +{ + struct address_space *mapping = file->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; + unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur, written, last_index; /* last page index */ + loff_t pos; + long status; + int err; + unsigned bytes; + u32 comprblk_mask=0; + struct ext2_inode_info *ei = EXT2_I(inode); + + if (!(ei->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)) +#undef DUD //mw: I think this is a buggy bug-fix +#ifdef DUD + || (count < inode->i_sb->s_blocksize) +#endif + ) + { + return do_sync_write(file, buf, count, ppos); + } + + if ((ssize_t) count < 0) + return -EINVAL; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + +#ifdef EXT2_COMPR_REPORT_MUTEX + printk(KERN_DEBUG "EXT2_FILE_WRITE_LOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino ); +#endif + mutex_lock(&inode->i_mutex); + /* mw: down_read(&inode->i_alloc_sem); // as used by ocsf2 TLL 02/21/07 + was removed with kernel 3.1 */ + atomic_inc(&inode->i_dio_count); + + pos = *ppos; + err = -EINVAL; + if (pos < 0) + goto out; + + written = 0; + + /* FIXME: this is for backwards compatibility with 2.4 */ + if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) + { + pos = inode->i_size; + } + + /* + * Check whether we've reached the file size limit. + */ + err = -EFBIG; + + if (limit != RLIM_INFINITY) { + if (pos >= limit) { + send_sig(SIGXFSZ, current, 0); + goto out; + } + if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) { + /* send_sig(SIGXFSZ, current, 0); */ + count = limit - (u32)pos; + } + } + + /* + * LFS rule + */ + if ( pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { + if (pos >= MAX_NON_LFS) { + send_sig(SIGXFSZ, current, 0); + goto out; + } + if (count > MAX_NON_LFS - (u32)pos) { + /* send_sig(SIGXFSZ, current, 0); */ + count = MAX_NON_LFS - (u32)pos; + } + } + + /* + * Are we about to exceed the fs block limit ? + * + * If we have written data it becomes a short write + * If we have exceeded without writing data we send + * a signal and give them an EFBIG. + * + * Linus frestrict idea will clean these up nicely.. + */ + if (!S_ISBLK(inode->i_mode)) { + if (pos >= inode->i_sb->s_maxbytes) { + if (count || pos > inode->i_sb->s_maxbytes) { + send_sig(SIGXFSZ, current, 0); + err = -EFBIG; + goto out; + } + /* zero-length writes at ->s_maxbytes are OK */ + } + + if (pos + count > inode->i_sb->s_maxbytes) + count = inode->i_sb->s_maxbytes - pos; + } else { + if (bdev_read_only(inode->i_sb->s_bdev)) { + err = -EPERM; + goto out; + } + if (pos >= inode->i_size) { + if (count || pos > inode->i_size) { + err = -ENOSPC; + goto out; + } + } + + if (pos + count > inode->i_size) + { + count = inode->i_size - pos; + } + } + + err = 0; + if (count == 0) + goto out; + + status = 0; + + if (file->f_flags & O_DIRECT) + { + err = -EINVAL; + goto out; + } + /* + * We must still check for EXT2_ECOMPR_FL, as it may have been + * set after we got the write permission to this file. + */ + if ((ei->i_flags & (EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL)) == (EXT2_ECOMPR_FL | 0)) + { + err = -EXT2_ECOMPR; + goto out; + } + + should_remove_suid(file->f_dentry); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + + if ((pos+count) > inode->i_size) + last_index = (pos+count-1) >> PAGE_CACHE_SHIFT; + else + last_index = (inode->i_size-1) >> PAGE_CACHE_SHIFT; + + comprblk_mask = ei->i_flags | ~EXT2_COMPRBLK_FL; + + //mw: now do it cluster-wise + do { + //unsigned long index, offset, clusters_page_index0, + unsigned long index, nextClusterFirstByte, cluster_compressed=0; + u32 cluster=0; + status = -ENOMEM; /* we'll assign it later anyway */ + +#ifdef EXT2_COMPRESS_WHEN_CLU + ei->i_flags |= EXT2_COMPRBLK_FL; + assert( (file->f_flags & O_DIRECT) == 0); + assert(mapping_mapped(inode->i_mapping) == 0); +#endif + + index = pos >> PAGE_CACHE_SHIFT; /*mw: pageindex (start)*/ + cluster = ext2_page_to_cluster(inode, index); + + /* + * We decompress the cluster if needed, and write + * the data as normal. The cluster will be + * compressed again when the inode is cleaned up. + */ + if ((comprblk_mask == ~(u32)0) + && !(ei->i_flags & EXT2_NOCOMPR_FL)) { + /* AUFFĂ„LLIG 2*/ + /* assert (block == pos >> inode->i_sb->s_blocksize_bits); */ + + cluster_compressed = ext2_cluster_is_compressed_fn(inode, cluster); + if (cluster_compressed < 0) { + if (! written) + written = cluster_compressed; + break; + } + } + + if (cluster_compressed > 0) { + /* Here, decompression take place */ + cluster_compressed = ext2_decompress_cluster(inode, cluster); + if (cluster_compressed < 0) { + if (! written) { + written = cluster_compressed; + } + break; + } + } + + nextClusterFirstByte = (ext2_cluster_page0(inode, cluster+1) * PAGE_CACHE_SIZE); + bytes = nextClusterFirstByte - pos; /*mw: bytes todo in this cluster*/ + if (bytes > count) { + bytes = count; /*mw: if end of data*/ + } + +#ifdef EXT2_COMPR_DEBUG + //assert we stay inside the cluster! + { + int endpos; + int endindex; + int endcluster; + unsigned long thisClusterFirstByte; + int relstart, relend, startblock, endblock; + + thisClusterFirstByte = (ext2_cluster_page0(inode, cluster) * PAGE_CACHE_SIZE); + + relstart = pos - thisClusterFirstByte; + relend = bytes + relstart; + + startblock = relstart >> 10; + endblock = relend >> 10; + + + endpos = pos + bytes; + //printk("do_sync_write cluster %d: inode:%lu, \t start:%i(%i), end:%i(%i), \t ccount:%d \t tcount:%d\n", cluster , inode->i_ino, relstart, startblock, relend , endblock, (int)bytes, count); + endindex = (endpos-1) >> PAGE_CACHE_SHIFT; /*mw: pageindex (start)*/ + endcluster = ext2_page_to_cluster(inode, endindex); + assert(cluster == endcluster); + } +#endif + + //mw: must unlock here, do_sync_write() will aquire the mutex again + mutex_unlock(&inode->i_mutex); + + //mw: this is pretty clever: we use the generic method now :-) + //printk("do_sync_write cluster %d, mapped:%i\n", cluster, mapping_mapped(inode->i_mapping)); + //status = do_sync_write_nolock(file, buf, bytes, &pos); //without locking mutex + status = do_sync_write(file, buf, bytes, &pos); //with locking mutex + assert(status>=0); + + mutex_lock(&inode->i_mutex); + + written += status; + count -= status; + buf += status; + +#ifdef EXT2_COMPRESS_WHEN_CLU + assert (ei->i_flags & EXT2_COMPRBLK_FL); + if ((ei->i_flags & EXT2_COMPR_FL) + && (ext2_offset_is_clu_boundary(inode, pos)) ) { + + if (mapping_mapped(inode->i_mapping) == 0 ) + /* + * Pierre Peiffer: For file mapped (via mmap, I mean), + * compression will occure when releasing the file. + * We must, in this case, avoid the pages (possibly + * mapped by a process) to be compressed under them. + */ + { + int error; + assert(mapping_mapped(inode->i_mapping) == 0); + error = ext2_compress_cluster(inode, cluster); + /*if (ext2_cluster_is_compressed_fn(inode, cluster)) + ext2_decompress_cluster(inode, cluster);*/ + assert(mapping_mapped(inode->i_mapping) == 0); + /* + * Actually, raising write_error may be a + * mistake. For example, + * ext2_cleanup_compressed_cluster() doesn't + * usually return any errors to user. todo: + * Have a look at ext2_compress_cluster, and + * check whether its errors are such that they + * should be returned to user. Some of the + * will be, of course, but it might be + * possible for it to return without + * change. + */ + if (error > 0) + comprblk_mask = ~(u32)0; + } else { +#ifdef EXT2_COMPR_REPORT + char bdn[BDEVNAME_SIZE]; + bdevname(inode->i_sb->s_bdev, bdn); +#endif + + trace_e2c("ext2_file_write: (dev. %s): " + "ino=%ld, cluster=%d: file mapped, does " + "not compress cluster\n", + bdn, inode->i_ino, cluster); + ei->i_flags |= EXT2_DIRTY_FL; + ei->i_compr_flags |= EXT2_CLEANUP_FL; + } + } +#endif + + } while (count); + *ppos = pos; + + /* + * For now, when the user asks for O_SYNC, we'll actually + * provide O_DSYNC. + */ + if (status >= 0) { + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + /*if (ei->i_compr_flags & EXT2_OSYNC_INODE) { + osync_already = 1; + } else { + osync_already = 0; + ei->i_compr_flags |= EXT2_OSYNC_INODE; + }*/ + /* Should 2nd arg be inode->i_mapping? */ + status = ex_generic_osync_inode(inode, file->f_mapping + /*, OSYNC_METADATA|OSYNC_DATA*/); + /*if (osync_already == 0) { + ei->i_compr_flags &= ~EXT2_OSYNC_INODE; + }*/ + } + } + + err = written ? written : status; + +# ifdef EXT2_COMPRESS_WHEN_CLU + //mw: ext2_compress_cluster() might remove EXT2_COMPRBLK_FL + //if the file does not compress at all. this is NO error: remove next line? + //assert (ei->i_flags & EXT2_COMPRBLK_FL); + + ei->i_flags &= comprblk_mask; + if ( (ei->i_flags & EXT2_COMPR_FL) + && (!ext2_offset_is_clu_boundary(inode, pos)) ) + { + ei->i_flags |= EXT2_DIRTY_FL; + ei->i_compr_flags |= EXT2_CLEANUP_FL; + } + +# else + if (ei->i_flags & EXT2_COMPR_FL) { + ei->i_flags |= EXT2_DIRTY_FL; + ei->i_compr_flags |= EXT2_CLEANUP_FL; + } +# endif +out: + +#ifdef EXT2_COMPR_REPORT_MUTEX + printk(KERN_DEBUG "EXT2_FILE_WRITE_UNLOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino); +#endif + /* mw: up_read(&inode->i_alloc_sem); // as used by ocsf2 TLL 02/21/07 + was removed with kernel 3.1 */ + inode_dio_done(inode); + mutex_unlock(&inode->i_mutex); + return err; +} + +/* + * Called when an inode is about to be open. + * We use this to disallow opening RW large files on 32bit systems if + * the caller didn't specify O_LARGEFILE. On 64bit systems we force + * on this flag in sys_open. + * Prevent opening compressed file with O_DIRECT. + */ +static int ext2_file_open(struct inode * inode, struct file * filp) +{ + if ((filp->f_flags & O_DIRECT) && (EXT2_I(inode)->i_flags & + (EXT2_COMPR_FL|EXT2_COMPRBLK_FL))) + return -EINVAL; + if (!(filp->f_flags & O_LARGEFILE) && inode->i_size > MAX_NON_LFS) + return -EFBIG; + + return 0; + } +#endif /* CONFIG_EXT2_COMPRESS*/ + /* * We have mostly NULL's here: the current defaults are ok for * the ext2 filesystem. @@ -63,7 +559,12 @@ int ext2_fsync(struct file *file, loff_t const struct file_operations ext2_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, +#ifdef CONFIG_EXT2_COMPRESS + .write = ext2_file_write, +#else .write = do_sync_write, +#endif + .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .unlocked_ioctl = ext2_ioctl, @@ -71,7 +572,11 @@ const struct file_operations ext2_file_o .compat_ioctl = ext2_compat_ioctl, #endif .mmap = generic_file_mmap, +#ifdef CONFIG_EXT2_COMPRESS + .open = ext2_file_open, +#else .open = dquot_file_open, +#endif .release = ext2_release_file, .fsync = ext2_fsync, .splice_read = generic_file_splice_read, --- linux-3.2-rc5/fs/ext2/ioctl.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/ioctl.c 2011-12-13 14:22:47.855976282 +0100 @@ -7,7 +7,14 @@ * Universite Pierre et Marie Curie (Paris VI) */ +#ifdef CONFIG_EXT2_COMPRESS +#include +#include +#include +#include +#else #include "ext2.h" +#endif #include #include #include @@ -17,6 +24,65 @@ #include +#ifdef CONFIG_EXT2_COMPRESS + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#ifdef CONFIG_GZ_HACK +static int check_name(struct inode *ino) +{ + struct dentry *dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); + if (dentry) + if ( + + (dentry->d_name.len >= 4) && + (((dentry->d_name.name[dentry->d_name.len - 2] == 'g') + && (dentry->d_name.name[dentry->d_name.len - 1] == 'z') + && ((dentry->d_name.name[dentry->d_name.len - 3] == '.') + || (dentry->d_name.name[dentry->d_name.len - 4] == '.'))) + + || ((dentry->d_name.name[dentry->d_name.len - 3] == 't') + && (dentry->d_name.name[dentry->d_name.len - 2] == 'g') + && (dentry->d_name.name[dentry->d_name.len - 1] == 'z') + && (dentry->d_name.name[dentry->d_name.len - 4] == '.') + && (dentry->d_name.len >= 5)) + + || ((dentry->d_name.name[dentry->d_name.len - 3] == 'p') + && (dentry->d_name.name[dentry->d_name.len - 2] == 'n') + && (dentry->d_name.name[dentry->d_name.len - 1] == 'g') + && (dentry->d_name.name[dentry->d_name.len - 4] == '.') + && (dentry->d_name.len >= 5)) + + || ((dentry->d_name.name[dentry->d_name.len - 3] == 'j') + && (dentry->d_name.name[dentry->d_name.len - 2] == 'p') + && (dentry->d_name.name[dentry->d_name.len - 1] == 'g') + && (dentry->d_name.name[dentry->d_name.len - 4] == '.') + && (dentry->d_name.len >= 5)) + + || ((dentry->d_name.name[dentry->d_name.len - 3] == 'b') + && (dentry->d_name.name[dentry->d_name.len - 2] == 'z') + && (dentry->d_name.name[dentry->d_name.len - 1] == '2') + && (dentry->d_name.name[dentry->d_name.len - 4] == '.') + && (dentry->d_name.len >= 5)) + + || ((dentry->d_name.name[dentry->d_name.len - 3] == 'm') + && (dentry->d_name.name[dentry->d_name.len - 2] == 'n') + && (dentry->d_name.name[dentry->d_name.len - 1] == 'g') + && (dentry->d_name.name[dentry->d_name.len - 4] == '.') + && (dentry->d_name.len >= 5)) + ) + ) { + return 1; + } + return 0; +} +#endif +#endif + + + long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_dentry->d_inode; @@ -24,6 +90,10 @@ long ext2_ioctl(struct file *filp, unsig unsigned int flags; unsigned short rsv_window_size; int ret; +#ifdef CONFIG_EXT2_COMPRESS + unsigned long datum; + int err; +#endif ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); @@ -75,7 +145,127 @@ long ext2_ioctl(struct file *filp, unsig } flags = flags & EXT2_FL_USER_MODIFIABLE; +#ifdef CONFIG_EXT2_COMPRESS + if (S_ISREG (inode->i_mode) || S_ISDIR (inode->i_mode)) { + + /* pjm 1998-01-14: In previous versions of + e2compr, the kernel forbade raising + EXT2_ECOMPR_FL from userspace. I can't + think of any purpose for forbidding this, + and I find it useful to raise + EXT2_ECOMPR_FL for testing purposes, so + I've removed the forbidding code. */ + if (S_ISREG (inode->i_mode) + && (EXT2_NOCOMPR_FL + & (flags ^ ei->i_flags))) { // mw hint: ^ is a (excluisive OR) + /* NOCOMPR_FL can only be changed if + nobody else has the file opened. */ + /* pjm 1998-02-16: inode->i_count is + useless to us because only dentries + use inodes now. Unfortunately, + there isn't an easy way of finding + the equivalent. We'd have to go + through all dentries using the + inode, and sum their d_count + values. Rather than do that, I'd + rather get rid of the exclusion + constraint. todo. */ + //printk("i_count: %i\n", atomic_read(&inode->i_count)); + //if (atomic_read(&inode->i_count) > 1) + //if (0) + if (ext2_get_dcount(inode) > 1) + { + mutex_unlock(&inode->i_mutex); /*mw*/ + return -ETXTBSY; + } + else { + /* pjm 970429: Discarding + cached pages is not very + clean, but should work. */ + /* pjm 980114: Not quite. We + should also sync any + mappings to buffers first. + This isn't very important, + as none of the current + e2compr programs can + trigger this, but todo. */ + invalidate_remote_inode (inode); + } + } + + if (EXT2_COMPR_FL + & (flags ^ ei->i_flags)) { + if (flags & EXT2_COMPR_FL) { + if (ei->i_flags & EXT2_COMPRBLK_FL) { + /* There shouldn't actually be any + compressed blocks, AFAIK. However, + this is still possible because sometimes + COMPRBLK gets raised just to stop + us changing cluster size at the wrong + time. + + todo: Call a function that just + checks that there are not compressed + clusters, and print a warning if any are + found. */ + } else { + int bits = MIN(EXT2_DEFAULT_LOG2_CLU_NBLOCKS, + (EXT2_LOG2_MAX_CLUSTER_BYTES + - inode->i_sb->s_blocksize_bits)); + + ei->i_log2_clu_nblocks = bits; + ei->i_clu_nblocks = 1 << bits; + } + ei->i_compr_method = EXT2_DEFAULT_COMPR_METHOD; + if (S_ISREG (inode->i_mode)) { + //compress +#ifdef CONFIG_GZ_HACK + /* mw: check for .gz-files and similar + * I think this is the most clever place for + * rejecting files. They remain regular, uncompressed + * files and though can be read bypassing all + * compression stuff (= fast) :-). And it seems to save + * space... somehow */ + if (check_name (inode)) + { + //printk("non-compressable file extension\n"); + mutex_unlock(&inode->i_mutex); + return 0; + } +#endif + //set flags to trigger compression later on + flags |= EXT2_DIRTY_FL; + ei->i_compr_flags |= EXT2_CLEANUP_FL; + } + } else if (S_ISREG (inode->i_mode)) { + if (ei->i_flags & EXT2_COMPRBLK_FL) { + int err; + + if (ext2_get_dcount(inode) > 1){ + mutex_unlock(&inode->i_mutex); //mw + return -ETXTBSY; + } + err = ext2_decompress_inode(inode); + if (err) + { + mutex_unlock(&inode->i_mutex); //mw + return err; + } + } + ei->i_flags &= ~EXT2_DIRTY_FL; + ei->i_compr_flags &= ~EXT2_CLEANUP_FL; + } + } + } +#endif flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; +#ifdef CONFIG_EXT2_COMPRESS + /* bug fix: scrub 'B' flag from uncompressed files TLL 02/28/07 */ + if (!(flags & EXT2_COMPR_FL) && (flags & EXT2_COMPRBLK_FL) ) + { + flags &= ~EXT2_COMPRBLK_FL; + } +#endif ei->i_flags = flags; mutex_unlock(&inode->i_mutex); @@ -148,6 +338,184 @@ setflags_out: mnt_drop_write(filp->f_path.mnt); return 0; } +#ifdef CONFIG_EXT2_COMPRESS + case EXT2_IOC_GETCOMPRMETHOD: /* Result means nothing if COMPR_FL is not set */ + return put_user (ei->i_compr_method, (long *) arg); + case EXT2_IOC_SETCOMPRMETHOD: + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + if (IS_RDONLY (inode)) + return -EROFS; + if (get_user (datum, (long*) arg)) + return -EFAULT; + if (!S_ISREG (inode->i_mode) && !S_ISDIR (inode->i_mode)) + return -ENOSYS; + /* todo: Allow the below, but set initial value of + i_compr_meth at read_inode() time (using default if + !/) instead of +c time. Same for cluster + size. */ + if ((unsigned) datum >= EXT2_N_METHODS) + return -EINVAL; + if (ei->i_compr_method != datum) { + if ((ei->i_compr_method == EXT2_NEVER_METH) + && (ei->i_flags & EXT2_COMPR_FL)) + return -EPERM; + /* If the previous method was `defer' then + take a look at all uncompressed clusters + and try to compress them. (pjm 1997-04-16) */ + if ((ei->i_compr_method == EXT2_DEFER_METH) + && S_ISREG (inode->i_mode)) { + ei->i_flags |= EXT2_DIRTY_FL; + ei->i_compr_flags |= EXT2_CLEANUP_FL; + } + if ((datum == EXT2_NEVER_METH) + && S_ISREG (inode->i_mode)) { + //printk("SETCOMPR\n"); + if ((ei->i_flags & EXT2_COMPRBLK_FL)) + { + /*mw*/ + mutex_lock(&inode->i_mutex); + if (ext2_get_dcount(inode) > 1){ + mutex_unlock(&inode->i_mutex); /*mw*/ + return -ETXTBSY; + } + err = ext2_decompress_inode(inode); + mutex_unlock(&inode->i_mutex); + if ( err < 0) + return err; + } + ei->i_flags &= ~EXT2_DIRTY_FL; + ei->i_compr_flags &= ~EXT2_CLEANUP_FL; + } + ei->i_compr_method = datum; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } +#ifdef CONFIG_KMOD + if (!ext2_algorithm_table[ext2_method_table[datum].alg].avail) { + char str[32]; + + sprintf(str, "ext2-compr-%s", ext2_algorithm_table[ext2_method_table[datum].alg].name); + request_module(str); + } +#endif + datum = ((datum < EXT2_N_METHODS) + && (ext2_algorithm_table[ext2_method_table[datum].alg].avail)); + return put_user(datum, (long *)arg); + + case EXT2_IOC_GETCLUSTERBIT: + if (get_user (datum, (long*) arg)) + return -EFAULT; + if (!S_ISREG (inode->i_mode)) + return -ENOSYS; + /* We don't do `down(&inode->i_sem)' here because + there's no way for userspace to do the + corresponding up(). Userspace must rely on + EXT2_NOCOMPR_FL if it needs to lock. */ + err = ext2_cluster_is_compressed (inode, datum); + if (err < 0) + return err; + return put_user ((err ? 1 : 0), + (long *) arg); + + case EXT2_IOC_RECOGNIZE_COMPRESSED: + if (get_user (datum, (long*) arg)) + return -EFAULT; + if (!S_ISREG (inode->i_mode)) + return -ENOSYS; + if (IS_RDONLY (inode)) + return -EROFS; + return ext2_recognize_compressed (inode, datum); + + case EXT2_IOC_GETCLUSTERSIZE: + /* Result means nothing if COMPR_FL is not set (until + SETCLUSTERSIZE w/o COMPR_FL is implemented; + todo). */ + if (!S_ISREG (inode->i_mode) + && !S_ISDIR (inode->i_mode)) + return -ENOSYS; + return put_user (ei->i_clu_nblocks, (long *) arg); + + case EXT2_IOC_GETFIRSTCLUSTERSIZE: + /* Result means nothing if COMPR_FL is not set (until + SETCLUSTERSIZE w/o COMPR_FL is implemented; + todo). */ + if (!S_ISREG (inode->i_mode) + && !S_ISDIR (inode->i_mode)) + return -ENOSYS; + return put_user (ext2_first_cluster_nblocks(inode), (long *) arg); + + case EXT2_IOC_SETCLUSTERSIZE: + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + if (IS_RDONLY (inode)) + return -EROFS; + if (get_user (datum, (long *) arg)) + return -EFAULT; + if (!S_ISREG (inode->i_mode) + && !S_ISDIR (inode->i_mode)) + return -ENOSYS; + + /* These are the only possible cluster sizes. The + cluster size must be a power of two so that + clusters don't straddle address (aka indirect) + blocks. At the moment, the upper limit is constrained + by how much memory is allocated for de/compression. + Also, the gzip algorithms have some optimisations + that assume tht the input is no more than 32KB, + and in compress.c we would need to zero more bits + of head->holemap. (In previous releases, the file + format was limited to 32 blocks and under 64KB.) */ +// #if EXT2_MAX_CLUSTER_BLOCKS > 32 || EXT2_MAX_CLUSTER_NBYTES > 32768 +// # error "This code not updated for cluster size yet." +// #endif + switch (datum) { + case (1 << 2): datum = 2; break; + case (1 << 3): datum = 3; break; + case (1 << 4): datum = 4; break; + case (1 << 5): datum = 5; break; + default: return -EINVAL; + } + + assert (ei->i_clu_nblocks == (1 << ei->i_log2_clu_nblocks)); + if (datum == ei->i_log2_clu_nblocks) + return 0; + + if (ei->i_flags & EXT2_ECOMPR_FL) + return -EPERM; + if (!(ei->i_flags & EXT2_COMPR_FL)) + return -ENOSYS; + + /* We currently lack a mechanism to change the cluster + size if there are already some compressed clusters. + The compression must be done in userspace + (e.g. with the e2compress program) instead. */ + if (ei->i_flags & EXT2_COMPRBLK_FL) + return -ENOSYS; + + if (datum + inode->i_sb->s_blocksize_bits + > EXT2_LOG2_MAX_CLUSTER_BYTES) + return -EINVAL; + + ei->i_log2_clu_nblocks = datum; + ei->i_clu_nblocks = 1 << datum; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + return 0; + + case EXT2_IOC_GETCOMPRRATIO: + if (!S_ISREG (inode->i_mode)) + return -ENOSYS; + if (ei->i_flags & EXT2_ECOMPR_FL) + return -EPERM; + if ((long) (datum = ext2_count_blocks (inode)) < 0) + return datum; + if ((err = put_user ((long) datum, (long*) arg))) + return err; + return put_user ((long) inode->i_blocks, (long*) arg + 1); + + +#endif default: return -ENOTTY; } --- linux-3.2-rc5/fs/ext2/ext2.h 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/ext2/ext2.h 2011-12-13 14:22:47.855976282 +0100 @@ -37,6 +37,12 @@ struct ext2_inode_info { struct ext2_block_alloc_info *i_block_alloc_info; __u32 i_dir_start_lookup; +#ifdef CONFIG_EXT2_COMPRESS + __u8 i_log2_clu_nblocks; + __u8 i_clu_nblocks; + __u8 i_compr_method; + __u8 i_compr_flags; +#endif #ifdef CONFIG_EXT2_FS_XATTR /* * Extended attributes can be read independently of the main file @@ -126,6 +132,7 @@ extern void ext2_set_inode_flags(struct extern void ext2_get_inode_flags(struct ext2_inode_info *); extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); +extern void ext2_truncate_blocks(struct inode *inode, loff_t offset); /* ioctl.c */ extern long ext2_ioctl(struct file *, unsigned int, unsigned long); --- linux-3.2-rc5/include/linux/ext2_fs.h 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/include/linux/ext2_fs.h 2011-12-13 14:22:47.856976313 +0100 @@ -87,6 +87,10 @@ static inline struct ext2_sb_info *EXT2_ /* * Macro-instructions used to manage several block sizes */ +#define EXT2_GRAIN_SIZE 1024 +/* Minimum allocation unit. This is used in fs/ext2/compress.c to + check compr_len validity wrt (uncompressed) len. This definition + will probably need to be changed when fragments are implemented. */ #define EXT2_MIN_BLOCK_SIZE 1024 #define EXT2_MAX_BLOCK_SIZE 4096 #define EXT2_MIN_BLOCK_LOG_SIZE 10 @@ -178,9 +182,10 @@ struct ext2_group_desc #define EXT2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ #define EXT2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ /* Reserved for compression usage... */ -#define EXT2_DIRTY_FL FS_DIRTY_FL +#define EXT2_DIRTY_FL FS_DIRTY_FL /* Needs compressing; see Readme.e2compr */ #define EXT2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ #define EXT2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ +#define EXT2_NOCOMPR_FL FS_NOCOMP_FL /* Access raw data */ #define EXT2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ /* End compression flags --- maybe not all used */ #define EXT2_BTREE_FL FS_BTREE_FL /* btree format dir */ @@ -342,6 +347,7 @@ struct ext2_inode { #define EXT2_MOUNT_MINIX_DF 0x000080 /* Mimics the Minix statfs */ #define EXT2_MOUNT_NOBH 0x000100 /* No buffer_heads */ #define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */ +#define EXT2_MOUNT_FORCE_COMPAT 0x000400 /* Mount despite incompatibilities */ #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ @@ -507,8 +513,14 @@ struct ext2_super_block { #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff #define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR +#ifdef CONFIG_EXT2_COMPRESS +#define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_COMPRESSION| \ + EXT2_FEATURE_INCOMPAT_FILETYPE| \ + EXT2_FEATURE_INCOMPAT_META_BG) +#else #define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \ EXT2_FEATURE_INCOMPAT_META_BG) +#endif #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT2_FEATURE_RO_COMPAT_BTREE_DIR) @@ -588,4 +600,16 @@ enum { ~EXT2_DIR_ROUND) #define EXT2_MAX_REC_LEN ((1<<16)-1) +#ifndef __KERNEL__ +/* This simplifies things for user programs (notably e2fsprogs) that + must compile whether or not is present, but + would prefer to include it. Presumably the file is present if the + user has this version of ext2_fs.h. */ + +# /* Do not remove this comment. */ include + +/* The comment between `#' and `include' prevents mkdep from generating + a dependency on ext2_fs_c.h. */ +#endif + #endif /* _LINUX_EXT2_FS_H */ --- linux-3.2-rc5/fs/fcntl.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/fs/fcntl.c 2011-12-13 14:22:47.857976344 +0100 @@ -25,6 +25,12 @@ #include #include +#ifdef CONFIG_EXT2_COMPRESS +//mw: deny O_DIRECT on file with compression +#include +#include "ext2/ext2.h" +#endif + void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; @@ -171,6 +177,16 @@ static int setfl(int fd, struct file * f if (!filp->f_mapping || !filp->f_mapping->a_ops || !filp->f_mapping->a_ops->direct_IO) return -EINVAL; + +#ifdef CONFIG_EXT2_COMPRESS + //mw: if we have a compressed ext2 file: deny! + // TODO: maybe check fs-type first! + //assert(!(EXT2_I(inode)->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL))); + if (EXT2_I(inode)->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)) + { + return -EINVAL; + } +#endif } if (filp->f_op && filp->f_op->check_flags) --- linux-3.2-rc5/mm/truncate.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/mm/truncate.c 2011-12-13 14:22:47.858976376 +0100 @@ -22,6 +22,9 @@ #include #include "internal.h" +#ifdef CONFIG_EXT2_COMPRESS +#include +#endif /** * do_invalidatepage - invalidate part or all of a page @@ -551,6 +554,11 @@ void truncate_pagecache(struct inode *in * unmap_mapping_range call must be made for correctness. */ unmap_mapping_range(mapping, holebegin, 0, 1); +#ifdef CONFIG_EXT2_COMPRESS + if ((inode->i_op && inode->i_op->truncate) && + ((strcmp(inode->i_sb->s_type->name, "ext2") != 0) || + (!(EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL)))) +#endif truncate_inode_pages(mapping, newsize); unmap_mapping_range(mapping, holebegin, 0, 1); } --- linux-3.2-rc5/mm/swapfile.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/mm/swapfile.c 2011-12-13 14:22:47.859976408 +0100 @@ -31,6 +31,10 @@ #include #include #include +#ifdef CONFIG_EXT2_COMPRESS +#include +#endif + #include #include @@ -2056,6 +2060,24 @@ SYSCALL_DEFINE2(swapon, const char __use } inode = mapping->host; + +#ifdef CONFIG_EXT2_COMPRESS + /* + * Swapping not supported for e2compressed files. + * (Actually, this code is pretty useless because we + * should get an error later anyway because of the + * holes.) Yes, this is pretty horrible code... I'll + * improve it later. + */ + if ((strcmp(inode->i_sb->s_type->name, "ext2") == 0) + && (EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL)) + { + printk("Assertion: Error NO swap SWAP implemented!\n"); + error = -EINVAL; + goto bad_swap; + } +#endif + /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ error = claim_swapfile(p, inode); if (unlikely(error)) --- linux-3.2-rc5/mm/filemap.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/mm/filemap.c 2011-12-13 14:22:47.860976440 +0100 @@ -43,6 +43,10 @@ #include +#ifdef CONFIG_EXT2_COMPRESS +# include +#endif + /* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. @@ -278,7 +282,19 @@ int filemap_fdatawait_range(struct addre PAGECACHE_TAG_WRITEBACK, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { unsigned i; +#ifdef CONFIG_EXT2_COMPRESS +/* + * I'm not sure that this is right. It has been reworked considerably since + * 2.6.5. - whitpa + */ + struct inode *inode = mapping->host; + //printk("wait_on_page_writeback_range\n"); + if ((strcmp(inode->i_sb->s_type->name, "ext2") != 0) + || (atomic_read(&inode->i_mutex.count) > 0) + || (EXT2_I(inode)->i_compr_flags & + EXT2_OSYNC_INODE)) +#endif for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1184,6 +1200,15 @@ page_ok: } nr = nr - offset; +#ifdef CONFIG_EXT2_COMPRESS + lock_page(page); + //check again: after locking still uptodate? + if(!PageUptodate(page)){ + unlock_page(page); + goto page_not_up_to_date; + } +#endif + /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. @@ -1215,6 +1240,10 @@ page_ok: offset &= ~PAGE_CACHE_MASK; prev_offset = offset; +#ifdef CONFIG_EXT2_COMPRESS + unlock_page(page); +#endif + page_cache_release(page); if (ret == nr && desc->count) continue; @@ -1224,7 +1253,12 @@ page_not_up_to_date: /* Get exclusive access to the page ... */ error = lock_page_killable(page); if (unlikely(error)) + { + printk("Readpage Error: mw: page locking failed with code: %i\n", error); + printk("Readpage Error: mw: might happen as page was locked 'killable'\n"); + printk("Readpage Error: mw: was reading app killed?\n"); goto readpage_error; + } page_not_up_to_date_locked: /* Did it get truncated before we got the lock? */ @@ -1255,13 +1289,17 @@ readpage: page_cache_release(page); goto find_page; } + printk("Readpage Error: fs-specific readpage failed with code: %i\n", error); goto readpage_error; } if (!PageUptodate(page)) { error = lock_page_killable(page); if (unlikely(error)) + { + printk("Readpage Error: page was not uptodate after read. page locking failed with code: %i\n", error); goto readpage_error; + } if (!PageUptodate(page)) { if (page->mapping == NULL) { /* @@ -1274,6 +1312,7 @@ readpage: unlock_page(page); shrink_readahead_size_eio(filp, ra); error = -EIO; + printk("Readpage Error: page was not uptodate after read AND page locked. failed with code: %i\n", error); goto readpage_error; } unlock_page(page); @@ -1285,6 +1324,7 @@ readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ desc->error = error; page_cache_release(page); + printk("Readpage Error\n"); goto out; no_cached_page: --- linux-3.2-rc5/mm/page_alloc.c 2011-12-10 00:09:32.000000000 +0100 +++ linux-3.2-rc5-e2c/mm/page_alloc.c 2011-12-13 14:22:47.863976534 +0100 @@ -1733,6 +1733,8 @@ this_zone_full: } return page; } +/*mw: needed to build ext2 /w e2compr as module */ +EXPORT_SYMBOL(__pagevec_free); /* * Large machines with many possible nodes should not always dump per-node