kernel.patches/3.2.34/linux-3.2-e2c-0.4.58.patch

7808 lines
258 KiB
Diff
Raw Normal View History

2012-11-24 17:08:51 +01:00
--- linux-3.2-rc5/fs/ext2/ChangeLog.e2compr-26port 1970-01-01 01:00:00.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/ChangeLog.e2compr-26port 2011-12-13 14:22:47.822975235 +0100
@@ -0,0 +1,439 @@
+
+e2compr - Released under the GPL V 2 license.
+
+
+Installation:
+=============
+
+1. gunzip:
+ > gunzip linux-3.1-rc3-e2c-0.4.58.patch.gz
+
+2. change to you kernel directory
+
+3. make clean:
+ > make clean
+
+3. patch:
+ > patch -p1 < ../patch/to/patch/linux-3.1-rc3-e2c-0.4.58.patch
+
+ see if any rejects occured:
+ > find | grep .rej
+
+ WARNING: All rejects must be fixed manually!
+
+4. config:
+ > make oldconfig
+ > make menuconfig
+ Now enable at least the ext2-compression feature:
+ Filesystems:
+ <*> Second extended fs support
+ [ ] Ext2 extended attributes
+ [ ] Ext2 execute in place support
+ [*] Ext2 file compression (DANGEROUS)
+ Ext2 file compression options --->
+
+5. make:
+ > make
+
+
+Building a patch:
+=================
+
+files.txt:
+
+fs/ext2/ChangeLog.e2compr-26port
+Documentation/filesystems/e2compress.txt
+fs/ext2/Readme.e2compr
+fs/Kconfig
+include/linux/ext2_fs_c.h
+fs/ext2/Makefile
+fs/ext2/compress.c
+fs/ext2/e2zlib.c
+fs/ext2/adler32.c
+fs/ext2/super.c
+fs/ext2/ialloc.c
+fs/ext2/balloc.c
+fs/ext2/inode.c
+fs/ext2/file.c
+fs/ext2/ioctl.c
+fs/ext2/ext2.h
+include/linux/ext2_fs.h
+fs/fcntl.c
+mm/truncate.c
+mm/swapfile.c
+mm/filemap.c
+mm/page_alloc.c
+
+
+cat files.txt | xargs -n1 -I '{}' diff -pruNbB linux-3.1-rc3/'{}' linux-3.1-rc3-e2c/'{}' > ./linux-3.1-e2c-0.4.58.patch
+
+
+Changelog:
+==========
+
+25 August 2011
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * released version 0.4.58 for kernel 3.1
+ * file.c: i_alloc_sem was removed. I am not sure if only holding i_mutex
+ will be enough. See http://patchwork.ozlabs.org/patch/101859/.
+ In ext2_file_write() I replaced:
+
+ mutex_lock(&inode->i_mutex);
+ - down_read(&inode->i_alloc_sem);
+ + atomic_inc(&inode->i_dio_count);
+
+ - up_read(&inode->i_alloc_sem);
+ + inode_dio_done(inode);
+ mutex_unlock(&inode->i_mutex);
+
+ The main prupose of i_dio_count is blocking vmtruncate_range()
+ as long as the i_dio_count is greater than 0. In other words,
+ all direct io must be completed before truncating is allowed.
+
+ * file.c: generic_osync_inode was removed from mm - added functionality to
+ file.c as ex_generic_osync_inode()
+ * file.c: changed: &inode_lock to &inode->i_lock
+ * ext2_warning() replaced by ext2_msg()
+ * compress.c: vfs_dq_init(inode) replaced by dquot_initialize(inode)
+ * compress.c: ext2_truncate(inode) replaced by
+ ext2_truncate_blocks(inode, inode->i_size) which looks like
+ exactly the same!
+ * inode.c: dentry->d_lock now seems to need
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED) held.
+ * compress.c, inode.c: added might_schedule() before wait_on_buffer()
+ statements to assure we are not atomic at this point.
+ * truncate.c: removed patch from memory.c and moved it to truncate.c
+ as surrounding kernel code also moved there. vmtruncate() was
+ split in truncate_setsize() and truncate_pagecache() with kernel 3.1
+
+
+10 August 2009
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * released version 0.4.58
+ * merged assert.h and debug.h into ext2_fs_c.h
+ * merged NDEBUG into EXT2_COMPR_DEBUG
+ * disabled adler cheksums on "read" if not defined EXT2_COMPR_DEBUG.
+ * merged none.c into compress.c
+ * inserted multiple defines "CONFIG_EXT2_COMPRESS" to allow disabling
+ of ext2compression with patched sources.
+ * re-inserted EXPORT_SYMBOL(__pagevec_free) to support ext2 as module
+
+05 August 2009
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * released version 0.4.57
+ * ported to kernel 2.6.30:
+ inode.c: after fix of generic ext2 ext2_get_blocks() needed to remove bforget.
+ * integrated SMP from version 0.4.56
+ * per CPU one separate read and one separate write working area
+ * removed all external compression codecs
+ * removed "verify compression" (never helped to find a bug anyway)
+ * Lindent'ed all source and header files
+
+01 August 2008
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * released version 0.4.55
+ * complete code cleanup
+ * changed policy to ALWAYS_LOCKING pages in do_generic_mapping_read()
+ => completely removed PG_Compr-Flag now!
+
+31 July 2008
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * released version 0.4.54
+ * fixes rare himem bug: only occures if page > cluster in inode.c/readpage()
+ * fixes rare readpage bug in mm/filemap.c/do_generic_mapping_read():
+ PG_Compr flags dissallow reading a page while de/compressing.
+ Setting and unsetting it requires the page lock, with one exception
+ do_generic_mapping_read() in filemap.c. This is done because of performance
+ reasons. Anyway, a simultaneous call of do_generic_mapping_read() for the SAME
+ page might break the PG_Compr-Mimic.
+
+ Solutions: Always lock any page before reading OR second(n-th) call of
+ do_generic_mapping_read() busy waits until first is done.
+ Default is busy wait now, ALWAYS_LOCK implemented as option via define.
+
+25 June 2008
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * released version 0.4.53
+ * fixes himem bug: unmapped block in ext2_decompress_cluster()
+ * fixes bdev bug: ext2_get_block() must be called for every block
+ which cause ooops because of bdev == NULL. ext2_get_block() will
+ set the correct bdev and the correct blocknumber of the block.
+
+ NEVER assign bdev manually, because the blocknumber might be random then:
+ "block->b_bdev = something" (DON'T!)
+
+ ALWAYS use:
+ if (!buffer_mapped(block)) || (block->b_bdev == NULL)
+ ext2_get_block()
+
+ Bdev bug is closely related to file holes (empty block in a file).
+ If compressed data will be written to a former hole, then
+ usually ext2_get_block() must be called with create.
+ ext2_get_block( , , , 1 /*create*/).
+
+ * fixed missing include in xattr.h
+ * EXT2_COMPRBLK might be removed during compression if a cluster
+ doesn't compress. During compression we re-raise EXT2_COMPRBLK
+ flag after every cluster now.
+ * added missing export of __pagevec_free to (mm/page_alloc.c)
+ * deny O_DIRECT access mode after open of a file using fcntl()
+ (in fs/fcntl.c).
+ * file.c:
+ Replaced ext2_filew_write() to use kernels generic
+ do_sync_write(). Writing on compressed files calls
+ ext2_filew_write():
+ - divide write range into clusters
+ - ext2_decompress_cluster (if needed)
+ - do_sync_write()
+ - ext2_compress_cluster (if needed)
+ * inode.c:
+ ext2_writepage()/ext2_writepages() usually writes back
+ dirty pages of an inode. They reside in the kernels page cache.
+ This pages might e.g. be written/dirtied by a mmap()-ped file.
+ Also generic_file_aio_write() uses ext2_writepage() finally.
+ I don't see how the ext2_writepage() would handle compressed
+ files, so I re-inserted and re-wrote this part of old 2.4 code.
+ Don't know if this code (USE_WRITEPAGE) is needed at all.
+ So I leave it disabled by default. Enabled it might
+ leave compressed files with compression ratio of 100%.
+ Don't use yet!
+
+17 April 2008
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * first patch for kernel 2.6.25 released
+
+20 March 2008
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * version 0.4.52: EXT2_COMPRESS_WHEN_CLU didn't work. this
+ feature enables compression during file write.
+
+15 Oct 2007
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * First offical Sourceforge release as version 0.4.51
+ * TODO: figure out what is necessary to enable swap
+ suppport for e2compr again (see mm/swapfile.c).
+
+27 Sep 2007
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * System stalled with a lot of I/O during de-compression of
+ USB-Sticks, too. I replaced mark_buffer_dirty
+ with set_buffer_dirty. This achieves that ONLY the buffers
+ and not the pages are marked. Then I write back the
+ buffers with ll_rw_block() at the end of
+ ext2_decompress_cluster() and ext2_decompress_pages().
+ This should stop flooding the system with dirty pages.
+ Because now every routine waits for its newly dirtied buffers.
+ My system with 128MB of RAM is responding much more better during
+ compression/decompression now. Desompression also seems
+ to be a bit faster.
+ (this change is active with: #ifndef E2C_GENERIC_OSYNC)
+
+25 Sep 2007
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * System stalled with a lot of I/O during compression of
+ USB-Sticks. Seems generic_osync_inode() should not be
+ called in ext2_compress_cluster. Therefore I replaced
+ it with ll_rw_block() to write the modified blocks
+ directly back to disk. This gave also a ~100% better
+ performance for compression.
+
+9 Sep 2007
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+ * fixed bdev-bug. this bug appeared primarily when
+ files contained holes. A page with holes, which
+ was dirty caused ext2_get_cluster_blocks [ext2_get_block()]
+ to create ALL blocks of the page, even if there were holes!
+ These allocated hole-blocks weren't set to 0 anywhere and
+ therefore contained invalid data. I changed the
+ code to never allocate these holes.
+
+ * ext2_truncate() added again to ext2_compress_cluster for
+ uncompressed clusters. Fixes filesize errors reported by
+ "e2fsck -f /dev/..."
+
+24 Aug 2007
+ Matthias Winkler <matthiaswinkler@users.sourceforge.net>
+
+ Major changes:
+ * completly ported inode->i_mutex
+
+ * clever CONFIG_GZ_HACK to reject "uncompressable" files
+ (according to their extension) early. The IOCTL in ioctl.c
+ which sets the compression on the file already rejects such
+ extensions now.
+
+ * new create_empty_buffers_e2c() was necessary, because the
+ "extra"-pages should NOT have a valid i_mapping! Further the
+ buffers needed to be initalized right.
+
+ * proper block initalization (bdev-bug) in:
+ - create_empty_buffers_e2c()
+ - ext2_get_cluster_blocks
+
+ * in file.c copied:
+ ...with one single change at ext2_mapping_read in label page_ok:
+ A new Page-Flag (page-flags.h) the so called "PG_compr"-Flag is
+ checked to assure the corresponding page is not under
+ compression/decompression. This was necessary because
+ generic_mapping_read() doesn't lock() the page in ALL cases!!!
+ Otherwise the generic_mapping_read() would have to lock EVERY page
+ in the whole system before returning it....
+
+ * Fixed HiMem-Support: Balanced ALL kamp/kunmap calls. Unbalanced
+ functions cause the system to hang at "kmap_himem()" after some
+ time. Can be seen with magic-sysctrl "altgr + prtscr + W".
+
+ * ext2_decompres_cluster() didn't mark uptodate pages for writeback.
+ Don't know how this method could EVER have worked...
+
+ * ext2_compress_cluster() caused an always increasing amount of dirty-pages
+ (cat /proc/vmstat) which couldn't be wrote back by sync/umount.
+ I think this was due the ClearPageDirty at the end of ext2_compress_cluster().
+
+ * introduced ext2_get_dcount() to savely determine if a file is really "open"
+ and to abort compression/decompression in such a case.
+
+ * Removed gzip completely and not working assembler code. Replaced by the
+ kernels built-in zlib, which is pretty the same code...
+
+ * New kernel configuration interface
+
+ * Rollback of some unecessary "fixes"...
+
+ TODO:
+
+ * HiMem-Support:
+ One might try to use kmap_atomic instead of kamp in ext2_readpage. kmap_atomic
+ doesn't block and might speed up the regular page reading. might.
+
+20 April 2007
+ Andreas:
+
+ * Replaced GZIP with zlib of the kernel because the assembly versions of existing
+ compression modules crashed.
+
+ * Replaced gzip with the kernel zlib, which is built-in anyway
+
+ * Initial HiMem-Support.
+
+
+06 Mar 2007
+
+ Terry Loveall <loveall@iinet.com>
+
+ * adapted linux-2.6.10-e2compr-0.4.45-alpha0126.diff to 2.6.18.5 kernel
+
+ * replaced most instances of down/up(inode->i_sem) with
+ lock/unlock(inode->i_mutex). For exception see file.c, below.
+
+ * made various printk regularizations to uniquely identify each printk
+ instance. Inserted missing KERN_DEBUG and KERN_WARNING.
+
+ * compress.c:
+ bug fix: ext2_count_blocks: init head_bh for each iteration.
+ bug fix: ext2_count_blocks: add set clen=ulen for uncompressable clusters.
+ bug fix: ext2_compress_cluster: replacement and inlining of an
+ invalidate_inode_buffers function to keep root filesystem changes
+ uptodate on disk (prevents umounting root file system to update).
+ warning fix: ext2_compress_cluster: various variables initialized.
+ ext2_compress_cluster: removed #ifdef NDEBUG
+ bug fix: ext2_compress_cluster: defined maxclus, calculate and set for:
+ bug fix: ext2_compress_cluster: set filesize for uncompressed clusters.
+ ext2_cleanup_compressed_inode: changed error message to indicate 'Z'
+ flag was caused by trying to un/compress already open file.
+ bug fix: cp to compr dir: Truncate uncompressed files to their
+ uncompressed length, i.e. force kernel to update inode and sb
+
+ * file.c:
+ removed file->f_error code since f_error no longer in file struct.
+ ext2_file_write: changed down/up i_sem to down_read/up_read i_alloc_sem
+
+ * inode.c:
+ bug fix: ext2_get_block: restored changed: loop to bforget
+
+ * ioctl.c:
+ ext2_ioctl: scrubbed 'B' flag on file uncompress.
+
+ * match[56]86.S:
+ made code dependent on #ifdef CONFIG_REGPARM to compile with either
+ register variable or stack variable parameter passing.
+
+28 Feb 2005
+
+ Yabo Ding <bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
+
+ * Corrected page unlocking in inode.c.
+
+19 Feb 2005
+
+ Paul Whittaker <whitpa@users.sourceforge.net>
+
+ * Added corrections le32_to_cpu in critical areas of compress.c
+ * Optimized function exit code in inode.c.
+
+24 Aug 2004
+Yabo Ding <bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
+
+ compress.c
+* ext2_decompress_pages()
+ The old code cannot reread data from disk to a changed buffers data pointer in 2.6.x.
+ So, I copy memory data(decompressed) to a temporary buffer;
+ Then reread data(compressed) from disk, and copy to head;
+ Then copy back the memory data from temporary buffer.
+ It seems clumsy, but it works well.
+* ext2_compress_cluster()
+ Force write to disk.
+
+ inode.c
+* ext2_writepage()
+ Delete old code. All directly call block_write_full_page() function.
+
+* ../Kconfig
+ Change e2compr config as a submenu config
+
+04 Aug 2004
+
+Paul Whittaker <whitpa@users.sourceforge.net>
+
+* compress.c: replaced mark_buffer_dirty(x,y) with mark_buffer_dirty(x). I'm
+ still not at all sure that this is sufficient.
+
+03 Aug 2004
+
+Paul Whittaker <whitpa@users.sourceforge.net>
+
+* ../../include/linux/ext2_fs_c.h: added missing prototypes for ext2_iLZRW3A(),
+ ext2_iLZRW3A(), ext2_rLZRW3A().
+
+02 Aug 2004
+
+Paul Whittaker <whitpa@users.sourceforge.net>
+
+* ../../mm/page_alloc.c: added EXPORT_SYMBOL(__pagevec_free).
+
+* ../../include/linux/pagemap.h, ../../mm/filemap.c: removed inline from
+ __grab_cache_page() declarations, added EXPORT_SYMBOL(__grab_cache_page).
+
+* ../../include/linux/mm.h, ../../mm/filemap.c: removed inline from
+ page_waitqueue() declarations, added EXPORT_SYMBOL(page_waitqueue).
+
+* bzip2/{lib_bzip_d,lib_bzip_e}.c, {gzip,lzo,lzrw3a,lzv1}/e2compr*.c:
+ replaced MOD_INC_USE_COUNT and MOD_DEC_USE_COUNT with try_module_get()
+ and module_put() to avoid deprecation and safety warnings.
+
+* lzrw3a/lzrw3a.c: added (UBYTE *) casts to avoid compiler warnings.
+
+* compress.c, inode.c: incorporated Yabo's changes, correcting mistakes in
+ ext2_readpages() in inode.c.
+
+* removed printks for ext2_discard_prealloc from file.c and inode.c (not
+ needed now that this problem has been resolved).
+
+2.6.5 -> 2.6.7 updates:
+
+* ../../mm/filemap.c: rewrote CONFIG_EXT2_COMPRESS hunk for 2.6.7.
+
+* compress.c, file.c: use mapping_mapped(), since mapping->i_mmap has changed
+ and mapping->i_mmap_shared no longer exists.
+
+* inode.c: page->count becomes page->_count.
--- linux-3.2-rc5/Documentation/filesystems/e2compress.txt 1970-01-01 01:00:00.000000000 +0100
+++ linux-3.2-rc5-e2c/Documentation/filesystems/e2compress.txt 2011-12-13 14:22:47.824975303 +0100
@@ -0,0 +1,116 @@
+Transparent compression for ext2 filesystem
+===========================================
+
+What this document is.
+----------------------
+This document is intended for explaining how e2compress has been implented/ported
+in kernel 2.4. It also give a status of current work. You need to have e2compress
+knowledge (i.e. to know how e2compress works, from a general point of view)
+
+What this document is not.
+--------------------------
+This document is not a full explaination of how e2compress work. For this,
+there are other documents such as fs/ext2/Readme.e2compr file for the technical
+point of view and user manual can be found at <http://e2compr.sourceforge.net/>.
+This site is also a place were you will find many information about e2compress
+development for kernel 2.4, tools, manuals and so on.
+
+
+Introduction
+============
+
+This is a first adaptation of e2compress for kernel 2.4. The work has been done
+by Alcatel (Alcatel Business Systems - R&D) at Illkirch. It has been started
+from the latest patch provided by Peter Moulder for kernel 2.2,
+i.e. e2compr-0.4.39-patch-2.2.18.
+It is full compatible with previous version.
+Here after you will first find some explainations about the choices mades for
+the development, and then the status of current work from functionnal point of
+view.
+
+
+Development
+===========
+
+As for previous patches, most interesting happens when reading in ext2_readpage
+and when writing in ext2_writepage and ext2_file_write.
+In fact, in 2.2 kernel, compression occures on cluster of blocks. So when reading
+or writing a part of a file, we first have to compute the cluster on which I/O
+occures, then we have to get every buffers of the cluster, uncompress the data if
+needed, then reading/writing happens "as for normal files".
+In 2.4 kernels, I/O occures through page cache: i.e. when reading/writing to a
+part of the file, first the corresponding page is get, we then get the needed
+buffers, which point to the page; this means that for keeping same work as for 2.2,
+we have to use the notion of cluster of page. For getting every buffers of a cluster,
+we first get every pages of the cluster, then get buffers of every pages...
+
+So, things happens as follow:
+
+ext2_readpage
+-------------
+If data corresponding to the page are in a compressed cluster, this functions perfoms
+more works: instead of reading one page, it reads the whole "cluster of pages".
+In fact, anyway, we have to read all compressed buffer. Once we have got all buffers
+of the cluster, uncompressed (at least a part of) the data, and located the part of
+the uncompressed data which correspond to the requested page, there is not any more
+lot of work for also reading (i.e. doing some memcpy) other pages belonging to this
+cluster.
+So, the first reading of the first page of the cluster is quite longer, but then,
+every pages of the cluster are uptodate in the cache.
+
+ext2_writepage
+--------------
+An overhead has been added for pages belonging to a compressed cluster.
+In fact, if cluster is still compressed on the disk, we can't directly write the
+page (which contains uncompressed data) in the middle of a compressed cluster.
+So, we first have to uncompress the whole cluster on the disk, then we can write the
+new data of the dirty page(s).
+
+ext2_file_write
+---------------
+This replaces `generic_file_write' when e2compress option is activated.
+It is a copy of `generic_file_write'. The main difference is that instead of looping
+page by page in `generic_file_write', we loops on cluster of page.
+In each loop:
+ * we compute the cluster on which beginning of data (to be written) belongs to.
+ * then, we get all pages of the cluster.
+ * If cluster is a compressed one, we read all pages, and uncompress it.
+ Otherwise, we perfoms a `prepare_write' (as in generic_file_write).
+ * We copy the data on each page from user space,
+ * Call `commit_write' on dirty pages.
+ * When reaching end of cluster, we compress it. (As in 2.2)
+
+Note: Another implentation could have been to keep generic_file_write, and add an overhead
+to `ext2_prepare_write' and `ext2_commit_write'; on the first access to a page of a compressed
+cluster, whole cluster will be uncompressed (i.e. all pages of the cluster will be read and
+uncompressed in `ext2_prepare_write') and when commiting the last page of the cluster,
+compression occures...
+
+ext2_open_file
+--------------
+In 2.4.16 kernel, this function has been added for treating the case of files opened for
+"direct IO". Direct IO is not supported on compressed file. So opening a file by this way
+is forbidden.
+
+Other places in ext2
+--------------------
+Other changes occures as in 2.2 for managing the compression flags of files and specific
+`COMPRESSED_BLK_ADDR' address for compressed blocks.
+So please, refer to existing documentation for 2.2 about this topic.
+
+Status
+======
+Today (middle of december 2001), e2compress on kernel 2.4.16 has been tested on i386
+architecture, is used with success by tens of people in the department from some weeks.
+It is full fonctionnal on ix86, full compatible with 2.2 version of e2compress.
+It should work on other architecture, but has NOT been tested.
+Please, note the following:
+ * No performance tests have been done.
+ * I don't proclaim that code is optimized (and it is probably not, but I hope that
+ "gurus" will not find it too bad)
+So, I think I can say that there is no known "big" bug or "blocking" bug.
+
+Some strange things has been observed in very limit case, i.e. when memory is overloaded.
+
+
+As usual, this e2compress comes without warranty, use it at your won risk, etc...
--- linux-3.2-rc5/fs/ext2/Readme.e2compr 1970-01-01 01:00:00.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/Readme.e2compr 2011-12-13 14:22:47.825975345 +0100
@@ -0,0 +1,511 @@
+
+ 0. Introduction
+ ~~~~~~~~~~~~~~~
+
+This file gives some technical information on e2compr and how it's
+implemented.
+
+More general information on e2compr can be found at
+http://e2compr.sourceforge.net/.
+
+The first couple of sections of this document are written for those
+who have no interest in the source code but just want to know enough
+to be able to predict and understand e2compr behaviour and its
+implications.
+
+Section 3 describes the e2compr-specific ext2 attributes for a file
+(i.e. chattr things).
+
+Section 4 describes the e2compr ioctls from the point of view of a
+user-mode C programmer.
+
+Section 5 gives more detail about the file format on disk.
+
+Section 6 gives details on what's written where, i.e. a map of e2compr
+code in the kernel.
+
+
+Authorship: section 2 is written mainly by Antoine; the remainder is
+written by Peter.
+
+Questions should be sent to the e2compr mailing list,
+e2compr-misc@lists.sourceforge.net, or to the current maintainers,
+bothie@users.sourceforge.net and whitpa@users.sourceforge.net.
+
+
+ 1. The idea
+ ~~~~~~~~~~~
+
+See section `E2compr implementation' in the main e2compr texinfo
+documentation for an introduction to how e2compr works. (Type
+`info "(e2compr)Implementation"' at the shell prompt.) It was
+originally written as part of the file you're now reading.
+
+
+ 2. More details
+ ~~~~~~~~~~~~~~~
+
+Every compressed file stores its cluster size in the inode structure
+(in the ext2 attribute flags field).
+This (the cluster size) is the most important information: when
+knowing the cluster size, we can convert a block number into a cluster
+number, get the cluster the block belongs to, and then get the block.
+The inode's flags field also keeps the algorithm that is used to compress data
+written to the file.
+
+(The algorithm that was used to compress a given
+cluster is stored in the cluster head near the beginning of the
+compressed data. This may differ from the current algorithm
+identified in the inode, which is only used to determine which
+algorithm to use at the time clusters are written.)
+
+The algorithm id and the cluster size are stored in the i_flags field
+(thus reducing the number of possible flags). We also create some new
+flags: the COMPRBLK flags tells if there is at least one compressed
+cluster in the file, the ECOMPR flag indicates that an error (related
+to compression) occurred while reading from or writing to this file.
+If it is set, the file becomes read-only. (In previous releases, you
+were denied even read access to the file unless you set the NOCOMPR
+flag. There might be some benefit in returning to the old behaviour
+if decompressing erroneous data can cause an OOPS, but I think it
+would be better to correct the decompressors. Others may disagree,
+pointing out that it costs CPU time to check for incorrect data.)
+
+Beside the information stored into the inode, each cluster holds some
+data. Here is the cluster_head structure for e2compr-0.4:
+
+struct ext2_cluster_head {
+ __u16 magic; /* == EXT2_COMPRESS_MAGIC_04X. */
+ __u8 method; /* compression method id. */
+ __u8 holemap_nbytes; /* length of holemap[] array */
+ __u32 checksum; /* adler32 checksum. Checksum covers all fields
+ below this one, and the compressed data. */
+ __u32 ulen; /* size of uncompressed data */
+ __u32 clen; /* size of compressed data (excluding cluster head) */
+ __u8 holemap[0]; /* bitmap describing where to put holes. */
+};
+
+The `magic' field is a magic number. It is used to detect filesystem
+corruption, and can also be used for data recovery purposes. (The
+e2compress program for e2compr-0.3 does this.)
+
+The `checksum' field contains an Adler-32 checksum on the fields below
+it in the struct and the compressed data. Its purpose is to protect
+us from buffer overruns caused by corrupted data.
+
+The `ulen' field says how many bytes are stored in the cluster, when
+uncompressed.
+
+The `clen' field says how many bytes are held in the cluster, when
+compressed.
+
+The `method'
+field identifies the algorithm that was used to compress the cluster
+(this id will be used to uncompress the cluster, not the one stored
+into the inode that will be used only to compress a new cluster).
+
+The variable-length `holemap' array says where to put hole blocks when
+decompressing data. The `holemap_nbytes' field gives the length of
+this array. Iff holemap_nbytes is zero then there are no holes (other
+than at the end of the cluster, as determined by ulen versus cluster
+size).
+
+The compressed data immediately follows the holemap array (with no
+padding before it).
+
+
+Compressing a cluster is done in the following way: We first get every
+block in the cluster and compute the bitmap. We then compress the
+non-hole data, and store back the compressed data into the existing
+blocks. Unused blocks are then freed.
+
+Decompressing a cluster is done in the following way: We get the
+cluster head and retrieve the bitmap. Missing blocks are allocated and
+put where the bitmap says, and then compressed data is decompressed and
+stored back into the blocks.
+
+
+Reading from a compressed cluster is really easy: get the blocks,
+decompress them into a working area, and get the bytes we want from
+the working area. Writing to a compressed cluster is done by first
+decompressing the cluster, and then write to it, as if it were a
+normal file. The file is then marked so that the cluster will be
+recompressed later. [pjm: Do we decompress the cluster even if it's
+to be entirely written over?]
+
+In the current version, compression really occurs only when the inode
+is put (which in turn only occurs when no processes have the file
+open). This may change.
+
+
+ 3. Ext2 file attributes
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+Attribute Lsattr Meaning
+~~~~~~~~~ ~~~~~~ ~~~~~~~
+EXT2_SECRM_FL s Secure deletion (not yet implemented)
+EXT2_UNRM_FL u Undelete-able. (Not yet implemented.)
+EXT2_COMPR_FL c Future writes to this file should be compressed.
+ (Clearing this flag decompresses the file if it
+ is a regular file and there is space to do so;
+ see the e2compr FAQ for details.)
+EXT2_SYNC_FL S Synchronous updates. (As far as I know, this is
+ not yet fully implemented.)
+EXT2_IMMUTABLE_FL i Immutable file.
+EXT2_APPEND_FL a Writes to file may only append.
+EXT2_NODUMP_FL d Not a candidate for backup with dump(8).
+EXT2_NOATIME_FL A No access time updates.
+EXT2_DIRTY_FL Z De/compression is yet to happen. Read the
+ source for exact meaning.
+EXT2_COMPRBLK_FL B File contains one or more compressed clusters.
+EXT2_NOCOMPR_FL X Access raw compressed data. This isn't really
+ supported at the moment; user-space access is
+ yet to be worked out for 0.4.
+EXT2_ECOMPR_FL E Compression error associated with this file
+EXT2_BTREE_FL I B-tree indexed directory (seemingly not yet implemented)
+EXT2_RESERVED_FL - (reserved for ext2 lib)
+
+See the chattr(1) man page for more verbose descriptions of the
+non-e2compr flags.
+
+
+ 4. Ioctls available
+ ~~~~~~~~~~~~~~~~~~~
+
+ In brief
+ ~~~~~~~~
+
+Action Ioctl To kernel From kernel
+~~~~~~ ~~~~~ ~~~~~~~~~ ~~~~~~~~~~~
+Get cluster bit EXT2_IOC_GETCLUSTERBIT Cluster num 1 or 0 (cmp,uncmp)
+Recognize compressed Cluster num -
+ EXT2_IOC_RECOGNIZE_COMPRESSED
+Get algorithm EXT2_IOC_GETCOMPRMETHOD - Id
+Set algorithm EXT2_IOC_SETCOMPRMETHOD Id -
+Get cluster size EXT2_IOC_GETCLUSTERSIZE - Cluster size
+Set cluster size EXT2_IOC_SETCLUSTERSIZE Cluster size -
+Get attributes EXT2_IOC_GETFLAGS - Flags
+Set attributes EXT2_IOC_SETFLAGS Flags -
+Get block size FIGETBSZ - Block size
+
+#include <linux/ext2_fs.h> to use any of these ioctls, except FIGETBSZ,
+which requires <linux/fs.h>.
+
+To find out what errors can be returned by these ioctls, read
+fs/ext2/ioctl.c (for all of the above ioctls except FIGETBSZ) or
+fs/ioctl.c (for FIGETBSZ).
+
+
+ Setting or testing a cluster bit
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+[Note: user-space access to compression details are yet to be worked out,
+so this section may not be accurate.]
+
+EXT2_IOC_GETCLUSTERBIT sets *arg to 1 if the specified cluster (0 for first
+cluster, 1 for second, etc.) is stored in compressed form.
+
+To make the kernel consider a certain cluster to be compressed (after
+you've done the compression yourself, in user space), use
+EXT2_IOC_RECOGNIZE_COMPRESSED. This ioctl checks the validity of the
+cluster's data, then marks it as compressed (if valid). This ioctl
+requires special priveleges, because if the compressed data is not
+valid then it may be possible to crash the system (due to buffer
+overruns).
+
+
+ Setting or getting the compression algorithm
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+EXT2_IOC_SETCOMPRMETHOD sets the default compression method (stored in
+the inode). This is the compression method that is used for future
+writes. In the current version of e2compr [accurate at 0.4.36], this
+does not cause a change to how
+existing clusters are stored, except when the compression method
+changes from `none' to something else, in which case the kernel
+attempts to compress ,all currently-uncompressed clusters` using the
+new algorithm. It is an error to use this ioctl on a file without the
+compressed attribute.
+
+EXT2_IOC_GETCOMPRMETHOD sets *arg to the current compression method.
+
+In either case, Id is one of: EXT2_DEFER_METH, EXT2_LZV1_METH,
+EXT2_AUTO_METH, EXT2_NEVER_METH, EXT2_BZIP2_METH, EXT2_LZO1X_1_METH,
+EXT2_LZRW3A_METH (deprecated), EXT2_GZIP1_METH, EXT2_GZIP2_METH, ...,
+EXT2_GZIP9_METH.
+
+
+ Setting or getting the cluster size
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+EXT2_IOC_SETCLUSTERSIZE sets the cluster size to the value of *arg.
+This ioctl fails if there are already compressed clusters in the file
+(as determined by checking the EXT2_COMPRBLK_FL attribute).
+
+EXT2_IOC_GETCLUSTERSIZE sets *arg to the current cluster size.
+Surprisingly, this ioctl succeeds even if the EXT2_COMPR_FL attribute
+is clear. (Maybe this will change in future, since the result is
+meaningless.)
+
+In either case, the size is one of {4, 8, 16, 32}, and represents the
+number of blocks per cluster. To convert to or from a number of
+bytes, use the FIGETBSZ ioctl.
+
+
+ Setting or getting the ext2 file attributes
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) are not
+e2compr-specific, but some attributes are e2compr-specific.
+
+*arg consists of the set of attributes for that file OR'ed together.
+E.g. a value of (EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_NODUMP_FL)
+for a regular file means that the file contains one or more compressed
+clusters, and should not be backed up when using dump(8).
+
+See section 3 for a description of the various attributes.
+
+Note that although the compression method and cluster size are
+physically stored in the flags field on disk this information is
+masked out (i.e. set to zero) for GETFLAGS if the kernel has e2compr compiled in.
+If the kernel does not have e2compr compiled in, then this information
+is not masked out. See section 5 for how the cluster size and
+compression method is stored if you wish to work with ,kernels without
+e2compr`.
+
+
+ Getting the block size
+ ~~~~~~~~~~~~~~~~~~~~~~
+
+This ioctl (FIGETBSZ) is not e2compr-specific, but is useful in
+interpreting a cluster size (which is specified as a number of blocks
+rather than bytes or kilobytes).
+
+*arg is set to the block size (in bytes) of the file. For ext2 files,
+this is one of {1024,2048,4096}. It is the same value for all files
+on the same filesystem.
+
+You must #include <linux/fs.h> to use this ioctl (unlike the rest of
+the ioctls listed here, which require <linux/ext2_fs.h>).
+
+
+ 5. File format
+ ~~~~~~~~~~~~~~
+
+A note on byte ordering. All current versions of the kernel and
+e2compr write to disk in little-endian format, so the 16-bit number
+`0x8EC7' would be written as a 0xC7 byte followed by a 0x8E byte.
+Unless you want to know the most general rule for byte ordering, you
+can skip to the `Inode' heading.
+
+In kernel 2.0, the ext2 fs is written to disk in the native byte
+ordering. On x86 machines, this means little endian; most other
+architectures are big-endian (so the same 16-bit number would be
+written as an 0x8E byte followed by 0xC7).
+
+On kernel 2.1 and later, the ext2 fs (including e2compr data) is
+written in little-endian order regardless of the host architecture.
+
+
+ 5.1. Inode
+ ~~~~~~~~~~
+
+fs/inode.c controls the reading and writing of inode information
+to/from disk; consult this file (functions ext2_read_inode(),
+ext2_update_inode() and/or ext2_write_inode()) for any detail omitted
+from this section.
+
+The physical structure of an inode is struct ext2_inode (defined in
+include/linux/ext2_fs.h).
+
+
+The i_flags member contains the ext2 file attributes, as well as
+cluster size and compression method.
+
+The normal flags are stored in the low 23 bits. Only the low 12 bits
+are defined at present, including 4 flags introduced by the e2compr
+patch. See ext2_fs.h for the flag meanings (search for
+EXT2_SECRM_FL).
+
+Bits 23 through 25 hold the cluster size, or more precisely the log2 of
+the number of filesystem blocks per cluster (excluding the first cluster;
+see ext2_first_cluster_nblocks in include/linux/ext2_fs_c.h).
+
+Bits 26 through 30 store the compression method. See the definitions
+for EXT2_LZV1_METH etc. in ext2_fs_c.h for the interpretation.
+
+Bit 31 is reserved for ext2 lib (which means that programs like e2fsck
+store things there during its operation but it isn't used by the
+kernel).
+
+
+ Data blocks
+ ~~~~~~~~~~~
+
+Uncompressed clusters are stored just as they would be without
+e2compr. So if there are no compressed clusters then the file
+is stored identically to any other file.
+
+
+If a cluster is compressed, then the first non-hole block starts with
+a `cluster head', as defined in struct ext2_cluster_head in ext2_fs.h.
+
+The magic number (i.e. the value of the `magic' field) is 0x8ec7.
+`method' holds one of EXT2_LZV1_ID and the like. `reserved_0'
+contains zero. `ubitmap' describes where the uncompressed data goes.
+(Recall that when we compress a cluster, we only compress the data
+from non-hole blocks, so we need to know where the holes and non-holes
+go when we decompress the data.) A `0' bit means a hole and a `1' bit
+means a data block; bit 0 refers to the first block, b1 the second,
+and so on.
+
+
+The block positions within the file where the compressed data is held
+is a subset of where the uncompressed data would be held. Further, if the
+uncompressed data occupies u non-hole blocks and this compresses to c
+blocks, then the compressed data occupies the first c non-hole blocks
+of the file (and the remainder are freed).
+
+[This paragraph is an expansion of the preceeding: if you understood
+the preceeding paragraph then skip this one.] Consider an array
+cblock[] where cblock[0] holds the block number on disk (or 0 to
+represent a hole) of the first block of a certain cluster of a file,
+cblock[1] the second, and so on. (If you are familiar with the bmap
+array or the format of first-level indirect blocks, then cblock[] is a
+section of that array.) Suppose that the cluster size of this file is
+16 blocks. Suppose too that, when uncompressed, blocks 0, 1, 5 and 6
+of the cluster are holes but the other 12 blocks (2,3,4,7,8,...,15)
+contain data. (Thus the bitmap is 0x0000ff9c.) Now if we compress this
+cluster to just 5 blocks, then cblock[0], [1], [5] and [6] will continue
+to be holes, ,the positions of the compressed data blocks` are stored in
+cblock[2], cblock[3], [4], [7] and [8], the blocks referenced by
+cblock[9] through cblock[15] are freed, and cblock[9] through cblock[15]
+are set to zero.
+
+
+ 6. What's coded where
+ ~~~~~~~~~~~~~~~~~~~~~
+
+File names in this section are relative to linux/fs/ext2, except for
+ext2_fs.h which is in linux/include/linux.
+
+Most of the action happens in compress.c; though note that a few
+small, commonly-used routines are written as inline functions in
+ext2_fs.h.
+
+ext2_readpage() and ext2_mmap() are in file.c. ext2_file_write() is
+also there.
+
+Routines to read/write the inode from/to disk are in inode.c.
+
+super.c contains some e2compr initialisation code (such as allocating
+the e2compr work area).
+
+All ioctl handling is in ioctl.c.
+
+acl.c is where we deny open() access in a couple of situations (if the
+EXT2_NOCOMPR_FL is set and another process has the file open; and we
+deny write access to a file with EXT2_ECOMPR_FL set).
+
+ialloc.c contains code in ext2_new_inode() for newly-created files to
+inherit compression attributes from the directory in which they're
+created.
+
+truncate.c handles truncation, i.e. zeroing any part of the cluster
+bitmap that's been truncated, and decompressing the final cluster (but
+marking dirty so that we try to recompress it on file close) if the
+new size is part-way through a compressed cluster, so that zeroing
+over the truncated data works.
+
+linux/include/linux/ext2_fs_i.h has the definition of the
+ext2-specific parts of the in-memory inode. (The on-disk inode is
+defined in ext2_fs.h.)
+
+linux/mm/filemap.c is also interesting, though there's no
+e2compr-specific code there. Similarly linux/include/linux/mm.h and
+linux/include/linux/fs.h.
+
+generic_readpage() is in linux/fs/buffer.c. Also all buffer handling.
+
+
+The cleanup scheme
+~~~~~~~~~~~~~~~~~~
+
+inode->u.ext2_i.i_compr_flags has only a single bit defined:
+EXT2_CLEANUP_FL. This bit gets set to 1 to indicate that
+ext2_cleanup_compressed_inode() needs to be called.
+
+There is a related flag stored on disk as well as in memory:
+EXT2_DIRTY_FL of i_flags. If ext2_cleanup_compressed_inode() couldn't
+finish it's job (e.g. due to I/O error) then it clears EXT2_CLEANUP_FL
+of i_compr_flags, but leaves EXT2_DIRTY_FL high.
+
+In ext2_read_inode(), if EXT2_DIRTY_FL is high then EXT2_CLEANUP_FL is
+raised, in the hope that ,whatever was preventing
+ext2_cleanup_compressed_inode() from finishing` is now past.
+
+Except for ext2_read_inode() as noted above, everything that raises
+EXT2_CLEANUP_FL (i.e. ext2_write_file(), ext2_ioctl() and
+ext2_truncate()) also raises EXT2_DIRTY_FL.
+
+Nothing lowers either EXT2_CLEANUP_FL or EXT2_DIRTY_FL except
+ext2_cleanup_compressed_inode() (and one or both of new_inode and
+delete_inode routines).
+
+
+One feels that at least one of these cleanup flags ought to
+disappear. The main use of the persistent EXT2_DIRTY_FL is where the
+user does `chattr -c' in order to decompress the file, but there isn't
+enough space on the device to do this. We can get rid of this problem
+by having ext2_ioctl() call ext2_cleanup_compressed_inode()
+try to
+
+
+Notes on a few variables
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Don't confuse the inode->i_dirt flag with (inode->u.ext2_i.i_flags &
+EXT2_DIRTY_FL). See section `The cleanup scheme' above for a
+description of EXT2_DIRTY_FL.
+
+
+inode->u.ext2_i.i_clu_nblocks,
+inode->u.ext2_i.i_log2_clu_nblocks:
+
+i_clu_nblocks is always equal to ,1 << i_clu_nblocks` (except during a
+couple of cycles while they're being changed; I haven't consciously
+tried to avoid problems for SMP machines in this respect).
+
+i_clu_nblocks is the number of blocks per cluster for this inode.
+
+Old information: these variables were previously called
+`i_cluster_bits' and `i_cluster_size'. They were in an array:
+
+inode->u.ext2_i.i_cluster_bits[2],
+inode->u.ext2_i.i_cluster_size[2]:
+
+I believe the reason these were declared as an array was for the case
+where someone changes the cluster size of a file that was already
+compressed. (Reason for this belief: All readers of these fields use
+[0]. On creation (ialloc), read_inode, and `chattr +c' (where
+previously uncompressed), both [0] and [1] are updated. On change
+(IOC_SET_CLUSTERSIZE), only [0] is updated.) Since ,changing cluster
+size of an already-compressed file` isn't implemented, I've renamed
+them and made them scalars rather than arrays.
+
+
+inode->u.ext2_i.i_flags: When the e2compr patch is applied, this
+variable only holds the low 24 bits of the on-disk i_flags field.
+(Without the e2compr patch applied, all 32 bits are available. An
+interesting side effect of this is that user programs can access the
+compression algorithm and cluster size on kernels without e2compr
+patch by using the EXT2_IOC_GETFLAGS, EXT2_IOC_SETFLAGS ioctls.)
+
+
+inode->u.ext2_i.i_compr_method: Holds the compression method
+identifier. Starting from e2compr-0.4.0, this is different from an
+algorithm identifier: an example of a method is gzip9; the
+corresponding algorithm is gzip. See compress.c for where
+ext2_method_table and ext2_algorithm_table are defined. ext2_fs.h has
+some enumerations for addressing these tables (search for
+`EXT2_NONE_METH' and `EXT2_NONE_ALG').
--- linux-3.2-rc5/fs/Kconfig 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/Kconfig 2011-12-13 14:22:47.826975380 +0100
@@ -7,6 +7,126 @@ menu "File systems"
if BLOCK
source "fs/ext2/Kconfig"
+
+config EXT2_COMPRESS
+ bool "Ext2 file compression (DANGEROUS)"
+ depends on EXT2_FS && EXPERIMENTAL
+ select CRYPTO
+ select CRYPTO_ALGAPI
+ select CRYPTO_DEFLATE
+ select ZLIB_INFLATE
+ select ZLIB_DEFLATE
+ help
+ Ext2 file compression allows transparent compression of files on an
+ ext2 filesystem. Transparent compression means that files are
+ stored on the disk in a compressed format but they are automatically
+ decompressed as they are read in and compressed when written out.
+ The user is in control of how and which files are compressed, using
+ the `chattr' utility (see chattr(1)). For the sake of safety,
+ administrative data (superblock, inodes, directories, etc.) are not
+ compressed.
+
+ Compression is very useful if you're short on disk space, and
+ provides a better option than having lots of .gz files around.
+ For more information, see <http://e2compr.sourceforge.net/>.
+
+ You _need_ to have the special e2compr version of e2fsck to be able
+ to make use of this.
+
+ If you say Y, you will be asked which compression algorithms you wish
+ to include. Gzip is a good all-round algorithm, as its 1..9 parameter
+ allows a good range of speed/compression trade-off. Other noteworthy
+ algorithms are LZV, which caters better to the faster/less compressing
+ end of the scale, and bzip, which caters slightly better to the more
+ compressing but slower end of the scale.
+
+ Ext2 compression is still experimental, so unless you know you need
+ it, you'd better say N.
+
+menu "Ext2 file compression options"
+ depends on EXT2_COMPRESS
+
+choice
+ #depends on EXT2_DEFAULT_COMPR_METHOD_GZIP
+ prompt "Gzip parameter for default compression method"
+ default EXT2_DEFAULT_COMPR_METHOD_GZIP8
+ help
+ You have selected `gzip' as your default compression algorithm, but
+ I need to know whether to use `gzip -1', `gzip -9', or somewhere
+ in between. gzip1 is the least compressing but fastest; gzip9 is the
+ most compressing and slowest; and the numbers in between have
+ characteristics in between (though not on a linear scale).
+ If unsure, say `8'.
+
+config EXT2_DEFAULT_COMPR_METHOD_GZIP1
+ bool "1"
+config EXT2_DEFAULT_COMPR_METHOD_GZIP2
+ bool "2"
+config EXT2_DEFAULT_COMPR_METHOD_GZIP3
+ bool "3"
+config EXT2_DEFAULT_COMPR_METHOD_GZIP4
+ bool "4"
+config EXT2_DEFAULT_COMPR_METHOD_GZIP5
+ bool "5"
+config EXT2_DEFAULT_COMPR_METHOD_GZIP6
+ bool "6"
+config EXT2_DEFAULT_COMPR_METHOD_GZIP7
+ bool "7"
+config EXT2_DEFAULT_COMPR_METHOD_GZIP8
+ bool "8"
+config EXT2_DEFAULT_COMPR_METHOD_GZIP9
+ bool "9"
+
+endchoice
+
+config GZ_HACK
+ bool "Exclude .gz files from automatic compression"
+ depends on EXT2_COMPRESS
+ default y
+ help
+ If you say Y here, then files created with names ending in `.gz' or
+ `.?gz' or `.bz2' don't inherit the `c' ("compress") attribute from
+ their parent directory. (However, you can still do `chattr +c FILE'
+ if you want to try to compress it anyway.) This means that you
+ don't waste CPU time trying to compress a file that probably can't
+ be compressed. See fs/ext2/namei.c if you want to add other rules.
+ If you have any aesthetic sensibilities then you will say N here
+ and try to implement something better. Most people will say Y here.
+
+
+choice
+ depends on EXT2_COMPRESS
+ prompt "Default cluster size (in blocks, usually 1KB each)"
+ default EXT2_DEFAULT_CLUSTER_BITS_5
+ help
+ To make random access to compressed files reasonably fast the files
+ are compressed in clusters. By default, the clusters will be of the
+ size defined here but there is a modified version of the chattr
+ utility that can set the cluster size for each file independently.
+ Large clusters usually result in better compression at the cost of
+ being slower.
+
+ Note that the answer to this question is specified in filesystem
+ blocks rather than in kilobytes, though most filesystems have 1KB
+ blocks anyway. (If you have a filesystem with large blocks then
+ you should know it, but if you want to check then "tune2fs -l
+ /dev/xxx | grep size".) The default is 32 blocks which is the
+ slowest setting but gives the best compression.
+
+config EXT2_DEFAULT_CLUSTER_BITS_2
+ bool "4"
+config EXT2_DEFAULT_CLUSTER_BITS_3
+ bool "8"
+config EXT2_DEFAULT_CLUSTER_BITS_4
+ bool "16"
+config EXT2_DEFAULT_CLUSTER_BITS_5
+ bool "32"
+
+endchoice
+
+endmenu
+
+
source "fs/ext3/Kconfig"
source "fs/ext4/Kconfig"
--- linux-3.2-rc5/include/linux/ext2_fs_c.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-3.2-rc5-e2c/include/linux/ext2_fs_c.h 2011-12-13 14:22:47.830975497 +0100
@@ -0,0 +1,498 @@
+/*
+ * Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch
+ * (transparent compression code)
+ * Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr) - Denis Richard (denis.richard@sxb.bsf.alcatel.fr)
+ * Adapted from patch e2compr-0.4.39-patch-2.2.18 .
+ */
+
+#ifndef EXT2_FS_C_H
+#define EXT2_FS_C_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <linux/ext2_fs.h>
+#include "../../fs/ext2/ext2.h"
+
+/* EXT2_COMPR_DEBUG enables:
+ * - all assertions
+ * - adler checksum checking
+ */
+//#undef EXT2_COMPR_DEBUG
+#define EXT2_COMPR_DEBUG
+
+#ifdef EXT2_COMPR_DEBUG
+# define assert(expr) \
+ if(unlikely(!(expr))) { \
+ printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
+#expr, __FILE__, __func__, __LINE__); \
+ }
+#else
+# define assert(expr) do {} while (0)
+#endif
+
+
+/* proof get_cpu and put_cpu correctness by calling might_sleep() or mabye schedule().
+ this will check if we are atomic */
+#ifdef EXT2_COMPR_DEBUG
+#define CHECK_NOT_ATOMIC assert(! in_atomic());//might_sleep();
+#else
+#define CHECK_NOT_ATOMIC
+#endif
+
+
+#undef EXT2_COMPR_REPORT
+//#define EXT2_COMPR_REPORT
+//#define EXT2_COMPR_REPORT_VERBOSE
+//#define EXT2_COMPR_REPORT_PUT
+//# define EXT2_COMPR_REPORT_FILEOPEN
+//#define EXT2_COMPR_REPORT_MUTEX
+
+#ifdef EXT2_COMPR_REPORT
+//# define EXT2_COMPR_REPORT_PUT
+//# define EXT2_COMPR_REPORT_WA
+//# define EXT2_COMPR_REPORT_MUTEX
+//# define EXT2_COMPR_REPORT_ALLOC /* disk allocation etc. */
+//# define EXT2_COMPR_REPORT_ALGORITHMS /* Compression algorithms */
+//# define EXT2_COMPR_REPORT_VERBOSE /* Various things I don't think
+// useful at the moment. */
+//#define EXT2_COMPR_REPORT_VERBOSE_INODE
+#endif
+
+
+#ifdef EXT2_COMPR_DEBUG
+#define E2COMPR_VERSION "ext2-compression: e2c-0.4.58-smp-debug (26 August 2011) for kernel 3.1"
+#else
+#define E2COMPR_VERSION "ext2-compression: e2c-0.4.58-smp-release (26 August 2011) for kernel 3.1"
+#endif
+
+#define EXT2_IOC_GETCLUSTERSIZE _IOR('c', 0, long)
+#define EXT2_IOC_SETCLUSTERSIZE _IOW('c', 0, long)
+#define EXT2_IOC_GETCOMPRMETHOD _IOR('c', 1, long)
+#define EXT2_IOC_SETCOMPRMETHOD _IOW('c', 1, long)
+#define EXT2_IOC_GETFIRSTCLUSTERSIZE _IOR('c', 2, long)
+#define EXT2_IOC_RECOGNIZE_COMPRESSED _IOW('c', 2, long)
+#define EXT2_IOC_GETCLUSTERBIT _IOR('c', 3, long)
+#define EXT2_IOC_GETCOMPRRATIO _IOR('c', 4, long)
+/* Don't use _IOW('c', {5,6}, long), as these are used by old
+ e2compress binaries as SETCLUSTERBIT and CLRCLUSTERBIT
+ respectively. */
+
+/* EXT2_xxxx_ALG is an index into ext2_algorithm_table[] defined in
+ fs/ext2/compress.c. */
+/* N.B. Don't change these without also changing the table in
+ compress.c. Be careful not to break binary compatibility.
+ (EXT2_NONE_ALG and EXT2_UNDEF_ALG are safe from binary
+ compatibility problems, though, so they can safely be renumbered --
+ and indeed probably should be if you do add another algorithm.) */
+#define EXT2_LZV1_ALG 0
+#define EXT2_LZRW3A_ALG 1
+#define EXT2_GZIP_ALG 2
+#define EXT2_BZIP2_ALG 3
+#define EXT2_LZO_ALG 4
+#define EXT2_NONE_ALG 5
+#define EXT2_UNDEF_ALG 6
+#define EXT2_N_ALGORITHMS 5 /* Count of "real" algorithms. Excludes
+ `none' and `undef'. */
+
+/* EXT2_xxxx_METH is an index into ext2_method_table[] defined in
+ fs/ext2/compress.c. */
+/* N.B. Don't change these without also changing the table in
+ compress.c. */
+#define EXT2_LZV1_METH 0
+#define EXT2_AUTO_METH 1
+#define EXT2_DEFER_METH 2
+#define EXT2_NEVER_METH 3
+#define EXT2_BZIP2_METH 4
+#define EXT2_LZRW3A_METH 8
+#define EXT2_LZO1X_1_METH 10
+#define EXT2_GZIP_1_METH 16
+#define EXT2_GZIP_2_METH 17
+#define EXT2_GZIP_3_METH 18
+#define EXT2_GZIP_4_METH 19
+#define EXT2_GZIP_5_METH 20
+#define EXT2_GZIP_6_METH 21
+#define EXT2_GZIP_7_METH 22
+#define EXT2_GZIP_8_METH 23
+#define EXT2_GZIP_9_METH 24
+
+#define EXT2_N_METHODS 32 /* Don't change this unless you know what
+ you're doing. In particular, it's tied
+ to the width of the algorithm field
+ in i_flags.*/
+
+/* Note: EXT2_N_ALGORITHMS can't be increased beyond 16 without
+ changing the width of the s_algorithms_used field in the in-memory
+ superblock. The on-disk s_algorithms_used field is 32 bits long.
+ (This is in a state of flux. Currently (1998-02-05) there is no
+ distinction: we always use the s_es copy. */
+
+
+#define EXT2_MAX_CLUSTER_BYTES (32*1024)
+#define EXT2_LOG2_MAX_CLUSTER_BYTES (5 + 10)
+
+#define EXT2_COMPRESS_MAGIC_04X 0x9ec7
+#define EXT2_MAX_CLUSTER_BLOCKS 32
+#define EXT2_MAX_CLUSTER_PAGES EXT2_MAX_CLUSTER_BYTES >> PAGE_CACHE_SHIFT
+#define EXT2_ECOMPR EIO
+/* A cluster is considered compressed iff the block number for the
+ last block of that cluster is EXT2_COMPRESSED_BLKADDR. If this
+ changes then check if there's anywhere that needs a cpu_to_le32()
+ conversion. */
+#define EXT2_COMPRESSED_BLKADDR 0xffffffff
+
+/* I like these names better. */
+#define EXT2_MAX_CLU_NBYTES EXT2_MAX_CLUSTER_BYTES
+#define EXT2_LOG2_MAX_CLU_NBYTES EXT2_LOG2_MAX_CLUSTER_BYTES
+#define EXT2_MAX_CLU_NBLOCKS EXT2_MAX_CLUSTER_BLOCKS
+
+
+#ifndef __KERNEL__
+
+/* Cluster head on disk, for e2compr versions before 0.4.0. I'm
+ leaving this here so tht as I may make e2compress able to read
+ old-style e2compr files. */
+struct ext2_cluster_head_03x {
+ __u16 magic; /* == EXT2_COMPRESS_MAGIC_03X */
+ __u16 len; /* size of uncompressed data */
+ __u16 compr_len; /* size of compressed data */
+ __u8 method; /* compress method */
+ __u8 reserved_0;
+ __u32 bitmap; /* block bitmap */
+ __u32 reserved_2; /* 0 or adler32 checksum of
+ _compressed_ data */
+};
+# define EXT2_COMPRESS_MAGIC_03X 0x8ec7 /* Head magic number
+ for e2compr versions
+ before 0.4.0. */
+#endif /* !__KERNEL__ */
+
+
+#ifdef __KERNEL__
+# ifdef CONFIG_EXT2_COMPRESS
+
+//mw
+#define CONFIG_EXT2_HAVE_GZIP
+
+/* If defined, compress each cluster as soon as we get to the end of a
+ whole cluster, when writing. (If undefined, we wait until
+ ext2_release_file() or the like.) */
+#define EXT2_COMPRESS_WHEN_CLU
+
+# ifdef CONFIG_EXT2_DEFAULT_COMPR_METHOD_DEFER
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_DEFER_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_BZIP2)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_BZIP2_METH
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_LZO1X_1_ME
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZO)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_LZO1X_1_METH
+# ifndef CONFIG_EXT2_HAVE_LZO
+# error "Default algorithm (lzo) is not compiled in."
+# endif
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZV1)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_LZV1_METH
+# ifndef CONFIG_EXT2_HAVE_LZV1
+# error "Default algorithm (lzv1) is not compiled in."
+# endif
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZRW3A)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_LZRW3A_METH
+# ifndef CONFIG_EXT2_HAVE_LZRW3A
+# error "Default algorithm (lzrw3a) is not compiled in."
+# endif
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP1)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_1_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP2)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_2_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP3)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_3_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP4)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_4_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP5)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_5_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP6)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_6_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP7)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_7_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP8)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_8_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP9)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_GZIP_9_METH
+# elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_BZIP2)
+# define EXT2_DEFAULT_COMPR_METHOD EXT2_BZIP2_METH
+# ifndef CONFIG_EXT2_HAVE_BZIP2
+# error "Default algorithm (bzip2) is not compiled in."
+# endif
+# else
+# error "No default compression algorithm."
+# endif
+# if EXT2_DEFAULT_COMPR_METHOD >= EXT2_GZIP_1_METH && EXT2_DEFAULT_COMPR_METHOD <= EXT2_GZIP_9_METH
+# ifndef CONFIG_EXT2_HAVE_GZIP
+# error "Default algorithm (gzip) is not compiled in."
+# endif
+# endif
+
+# if defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_2)
+# define EXT2_DEFAULT_LOG2_CLU_NBLOCKS 2
+# elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_3)
+# define EXT2_DEFAULT_LOG2_CLU_NBLOCKS 3
+# elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_4)
+# define EXT2_DEFAULT_LOG2_CLU_NBLOCKS 4
+# elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_5)
+# define EXT2_DEFAULT_LOG2_CLU_NBLOCKS 5
+# else
+# error "No default cluster size."
+# endif
+
+# define EXT2_DEFAULT_CLU_NBLOCKS (1 << EXT2_DEFAULT_LOG2_CLU_NBLOCKS)
+
+# if (EXT2_LZV1_ALG != 0) || (EXT2_BZIP2_ALG != 3) || (EXT2_LZO_ALG != 4) || (EXT2_N_ALGORITHMS != 5)
+# error "this code needs changing; but then, you shouldn't be messing with algorithm ids anyway unless you are very careful to protect disk format compatibility"
+# endif
+# ifdef CONFIG_EXT2_HAVE_LZV1
+# define _ext2_lzv1_builtin (1 << EXT2_LZV1_ALG)
+# else
+# define _ext2_lzv1_builtin 0
+# endif
+# ifdef CONFIG_EXT2_HAVE_LZRW3A
+# define _ext2_lzrw3a_builtin (1 << EXT2_LZRW3A_ALG)
+# else
+# define _ext2_lzrw3a_builtin 0
+# endif
+# ifdef CONFIG_EXT2_HAVE_GZIP
+# define _ext2_gzip_builtin (1 << EXT2_GZIP_ALG)
+# else
+# define _ext2_gzip_builtin 0
+# endif
+# ifdef CONFIG_EXT2_HAVE_BZIP2
+# define _ext2_bzip2_builtin (1 << EXT2_BZIP2_ALG)
+# else
+# define _ext2_bzip2_builtin 0
+# endif
+# ifdef CONFIG_EXT2_HAVE_LZO
+# define _ext2_lzo_builtin (1 << EXT2_LZO_ALG)
+# else
+# define _ext2_lzo_builtin 0
+# endif
+
+# ifdef CONFIG_EXT2_HAVE_LZV1_MODULE
+# define _ext2_lzv1_module (1 << EXT2_LZV1_ALG)
+# else
+# define _ext2_lzv1_module 0
+# endif
+# ifdef CONFIG_EXT2_HAVE_LZRW3A_MODULE
+# define _ext2_lzrw3a_module (1 << EXT2_LZRW3A_ALG)
+# else
+# define _ext2_lzrw3a_module 0
+# endif
+# ifdef CONFIG_EXT2_HAVE_GZIP_MODULE
+# define _ext2_gzip_module (1 << EXT2_GZIP_ALG)
+# else
+# define _ext2_gzip_module 0
+# endif
+# ifdef CONFIG_EXT2_HAVE_BZIP2_MODULE
+# define _ext2_bzip2_module (1 << EXT2_BZIP2_ALG)
+# else
+# define _ext2_bzip2_module 0
+# endif
+# ifdef CONFIG_EXT2_HAVE_LZO_MODULE
+# define _ext2_lzo_module (1 << EXT2_LZO_ALG)
+# else
+# define _ext2_lzo_module 0
+# endif
+
+# define EXT2_ALGORITHMS_MODULE (_ext2_lzv1_module | _ext2_lzrw3a_module | _ext2_gzip_module | _ext2_bzip2_module | _ext2_lzo_module)
+# define EXT2_ALGORITHMS_BUILTIN (_ext2_lzv1_builtin | _ext2_lzrw3a_builtin | _ext2_gzip_builtin | _ext2_bzip2_builtin | _ext2_lzo_builtin)
+
+# if EXT2_ALGORITHMS_MODULE & EXT2_ALGORITHMS_BUILTIN
+# error "Arithmetic error? Some algorithm appears to be both built-in and a module."
+# endif
+
+/* EXT2_ALGORITHMS_SUPP is what we test when mounting a filesystem.
+ See fs/ext2/super.c. */
+# define EXT2_ALGORITHMS_SUPP (EXT2_ALGORITHMS_MODULE | EXT2_ALGORITHMS_BUILTIN)
+# if EXT2_ALGORITHMS_SUPP == 0
+# error "You must select at least one compression algorithm."
+# endif
+
+/* Cluster head on disk. Little-endian. */
+struct ext2_cluster_head {
+ __u16 magic; /* == EXT2_COMPRESS_MAGIC_04X. */
+ __u8 method; /* compression method id. */
+ __u8 holemap_nbytes; /* length of holemap[] array */
+ __u32 checksum; /* adler32 checksum. Checksum covers all fields
+ below this one, and the compressed data. */
+ __u32 ulen; /* size of uncompressed data */
+ __u32 clen; /* size of compressed data (excluding cluster head) */
+ __u8 holemap[0]; /* bitmap describing where to put holes. */
+};
+
+
+struct ext2_wa_S {
+ __u8 u[EXT2_MAX_CLUSTER_BYTES]; /* Uncompressed data. */
+ __u8 c[EXT2_MAX_CLUSTER_BYTES]; /* Compressed data. */
+ __u8 heap[1]; /* Heap: working space for de/compression routines. */
+};
+
+# define EXT2_CLEANUP_FL 0x40 /* See Readme.e2compr */
+# define EXT2_OSYNC_INODE 0x20 /* sync of inode running */
+# define ROUNDUP_DIV(_n, _d) ((_n) ? 1 + (((_n) - 1) / (_d)) : 0)
+# define ROUNDUP_RSHIFT(_n, _b) ((_n) ? 1 + (((_n) - 1) >> (_b)) : 0)
+
+# if defined(EXT2_NDIR_BLOCKS) && (EXT2_NDIR_BLOCKS != 12)
+# error "e2compr currently assumes that EXT2_NDIR_BLOCKS is 12."
+/* If EXT2_NDIR_BLOCKS changes then change the definitions of
+ ext2_first_cluster_nblocks() and friends, and search the patch for
+ anywhere where 12 is hard-coded. (At the time of writing, it's
+ only hard-coded in ext2_first_cluster_nblocks().) What we want to
+ achieve is for clusters not to straddle address blocks. Apart from
+ performance, some code in compress.c (search for `straddle')
+ assumes this. */
+# endif
+
+# include <linux/fs.h>
+
+# define EXT2_ALG_INIT_COMPRESS 1
+# define EXT2_ALG_INIT_DECOMPRESS 2
+
+extern int ext2_get_cluster_pages (struct inode*, u32, struct page**, struct page *, int);
+extern int ext2_get_cluster_extra_pages (struct inode*, u32, struct page**, struct page**);
+extern int ext2_kmap_cluster_pages (struct page *, struct page**, struct page**);
+extern int ext2_kunmap_cluster_pages (struct page *, struct page**, struct page**);
+extern int ext2_get_cluster_blocks (struct inode*, u32, struct buffer_head**, struct page**, struct page**, int);
+extern int ext2_decompress_cluster (struct inode*, u32);
+extern int ext2_decompress_pages(struct inode*, u32, struct page**);
+extern int ext2_compress_cluster (struct inode*, u32);
+extern int ext2_decompress_inode (struct inode*);
+extern int ext2_cleanup_compressed_inode (struct inode*);
+extern void ext2_update_comprblk (struct inode *);
+extern int ext2_get_dcount(struct inode *inode);
+
+extern size_t ext2_decompress_blocks (struct inode*, struct buffer_head**, int, size_t, u32 cluster);
+extern int ext2_count_blocks (struct inode*);
+extern int ext2_recognize_compressed (struct inode *, unsigned cluster);
+extern unsigned long ext2_adler32 (unsigned long, unsigned char*, int);
+
+extern size_t ext2_iLZV1 (int);
+extern size_t ext2_iLZV2 (int);
+extern size_t ext2_iNONE (int);
+extern size_t ext2_iGZIP (int);
+extern size_t ext2_iBZIP2 (int);
+extern size_t ext2_iLZO (int);
+extern size_t ext2_iLZRW3A (int);
+extern size_t ext2_iZLIB (int);
+
+extern size_t ext2_wLZV1 (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_wLZV2 (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_wNONE (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_wGZIP (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_wBZIP2 (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_wLZO (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_wLZRW3A (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_wZLIB (__u8*, __u8*, void*, size_t, size_t, int);
+
+extern size_t ext2_rLZV1 (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_rLZV2 (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_rNONE (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_rGZIP (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_rBZIP2 (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_rLZO (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_rLZRW3A (__u8*, __u8*, void*, size_t, size_t, int);
+extern size_t ext2_rZLIB (__u8*, __u8*, void*, size_t, size_t, int);
+
+struct ext2_algorithm {
+ char *name;
+ int avail;
+ size_t (*init) (int);
+ size_t (*compress) (__u8*, __u8*, void*, size_t, size_t, int);
+ size_t (*decompress) (__u8*, __u8*, void*, size_t, size_t, int);
+};
+
+struct ext2_method {
+ unsigned alg;
+ int xarg;
+};
+
+
+# define ext2_first_cluster_nblocks(_i) ((EXT2_I(_i))->i_clu_nblocks > 4 && (_i)->i_sb->s_blocksize < 4096 ? 12 : 4)
+# define ext2_block_to_cluster(_i,_b) ((_b) < ext2_first_cluster_nblocks(_i) ? 0 : (((_b) - ext2_first_cluster_nblocks(_i)) >> (EXT2_I(_i))->i_log2_clu_nblocks) + 1)
+# define ext2_offset_to_cluster(_i,_o) ext2_block_to_cluster((_i), ((_o) >> (_i)->i_sb->s_blocksize_bits))
+# define ext2_n_clusters(_i) ((_i)->i_size ? ext2_offset_to_cluster((_i), (_i)->i_size - 1) + 1 : 0)
+# define ext2_cluster_block0(_i,_c) ((_c) ? ext2_first_cluster_nblocks(_i) + (((_c) - 1) << (EXT2_I(_i))->i_log2_clu_nblocks) : 0)
+# define ext2_cluster_nblocks(_i,_c) ((_c) ? (EXT2_I(_i))->i_clu_nblocks : ext2_first_cluster_nblocks(_i))
+# define ext2_cluster_offset(_i,_c) ((_c) ? ext2_cluster_block0((_i), (_c)) << (_i)->i_sb->s_blocksize_bits : 0)
+
+# define ext2_first_cluster_npages(_i) ((EXT2_I(_i))->i_clu_nblocks > 4 && (_i)->i_sb->s_blocksize < 4096 ? 12 >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : 4 >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits))
+# define ext2_page_to_cluster(_i,_p) ((_p) < ext2_first_cluster_npages(_i) ? 0 : (((_p) - ext2_first_cluster_npages(_i)) >> (((EXT2_I(_i))->i_log2_clu_nblocks)+(_i)->i_sb->s_blocksize_bits-PAGE_CACHE_SHIFT)) + 1)
+# define ext2_cluster_page0(_i,_c) ((_c) ? ext2_cluster_block0(_i, _c) >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : 0)
+# define ext2_cluster_npages(_i,_c) ((_c) ? (EXT2_I(_i))->i_clu_nblocks >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : ext2_first_cluster_npages(_i))
+
+static inline int
+ext2_offset_is_clu_boundary(struct inode *inode, u32 off)
+{
+ if (off & (inode->i_sb->s_blocksize - 1))
+ return 0;
+ if (off == 0)
+ return 1;
+ off >>= inode->i_sb->s_blocksize_bits;
+ if (off < ext2_first_cluster_nblocks(inode))
+ return 0;
+ off -= ext2_first_cluster_nblocks(inode);
+ return !(off & (EXT2_I(inode)->i_clu_nblocks - 1));
+}
+
+struct ext2_wa_contents_S {
+ ino_t ino;
+ dev_t dev;
+ unsigned cluster;
+};
+
+DECLARE_PER_CPU(struct ext2_wa_S *, ext2_rd_wa);
+DECLARE_PER_CPU(struct ext2_wa_S *, ext2_wr_wa);
+
+extern void ext2_alloc_rd_wa(void);
+extern void ext2_alloc_wr_wa(void);
+
+extern struct ext2_algorithm ext2_algorithm_table[];
+extern struct ext2_method ext2_method_table[]; /*mw: is static so far, no writes*/
+
+/* Both of these return -errno if error, 0 if not compressed, positive
+ if compressed. (You should use the macro unless you've already
+ tested COMPRBLK.) */
+extern int ext2_cluster_is_compressed_fn (struct inode *inode, __u32 cluster);
+static inline int ext2_cluster_is_compressed (struct inode *inode, __u32 cluster)
+{
+ if ((EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL) == 0)
+ return 0;
+ return ext2_cluster_is_compressed_fn (inode, cluster);
+}
+extern unsigned ext2_calc_free_ix (unsigned , u8 const *, unsigned );
+extern int ext2_unpack_blkaddrs(struct inode *, struct buffer_head **, int, unsigned , u8 const *, unsigned , unsigned , unsigned , unsigned );
+
+# define HOLE_BLKADDR(_b) \
+ (((_b) == 0) \
+ || ((_b) == EXT2_COMPRESSED_BLKADDR))
+# else /* !CONFIG_EXT2_COMPRESS */
+# define HOLE_BLKADDR(_b) ((_b) == 0)
+# endif
+
+/* For some reason or other, I see code like `if (le32_to_cpu(tmp) !=
+ 0)' around in the kernel. So far I haven't checked whether or not
+ the compiler knows that the swab can be dropped. */
+# if defined(EXT2_COMPRESSED_BLKADDR) && EXT2_COMPRESSED_BLKADDR != 0xffffffff
+/* This may be a false positive; the "correct" test would be `if
+ defined(CONFIG_EXT2_COMPRESS)', but if this test does succeed, then
+ there is at least cause to have a look around. */
+# error "Next bit of code is wrong."
+# endif
+
+# define HOLE_BLKADDR_SWAB32(_b) HOLE_BLKADDR(_b)
+
+#ifdef EXT2_COMPR_REPORT
+#define trace_e2c(format, args...) printk(KERN_DEBUG format, ## args)
+#else
+#define trace_e2c(format, args...) do {} while(0)
+#endif
+
+#endif /* __KERNEL__ */
+
+
+#endif /* EXT2_FS_C_H */
--- linux-3.2-rc5/fs/ext2/Makefile 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/Makefile 2011-12-13 14:22:47.830975498 +0100
@@ -2,10 +2,17 @@
# Makefile for the linux ext2-filesystem routines.
#
+ifeq ($(CONFIG_EXT2_COMPRESS),y)
+
+COMPRESS_STUFF := adler32.o compress.o e2zlib.o\
+ $($(obj-y):%/=%/ext2-compr-%.o)
+endif
+
obj-$(CONFIG_EXT2_FS) += ext2.o
ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
- ioctl.o namei.o super.o symlink.o
+ ioctl.o namei.o super.o symlink.o $(COMPRESS_STUFF)
+
ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
--- linux-3.2-rc5/fs/ext2/compress.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/compress.c 2011-12-13 14:22:47.839975781 +0100
@@ -0,0 +1,3420 @@
+/*
+ * linux/fs/ext2/compress.c
+ *
+ * Copyright (C) 1995 Antoine Dumesnil de Maricourt (dumesnil@etca.fr)
+ * (transparent compression code)
+ */
+
+/*
+ * Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch FRANCE
+ *
+ * Transparent compression code for 2.4 kernel.
+ *
+ * Denis Richard (denis.richard@sxb.bsf.alcatel.fr)
+ * Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr)
+ *
+ * Adapted from patch e2compr-0.4.39-patch-2.2.18 .
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/ext2_fs_c.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/buffer_head.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/quotaops.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kernel_stat.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/vmstat.h>
+#include <linux/file.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/mm_inline.h>
+#include <linux/pagevec.h>
+#include <linux/backing-dev.h>
+#include <linux/rmap.h>
+#include <linux/topology.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/notifier.h>
+#include <linux/rwsem.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <asm/tlbflush.h>
+#include <asm/div64.h>
+#include <linux/swapops.h>
+#include <linux/percpu.h>
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+#ifdef CONFIG_HIGHMEM
+#define restore_b_data_himem(bh) assert(page_address(bh->b_page)); bh->b_data = page_address(bh->b_page) + bh_offset(bh)
+
+
+
+int ext2_kmap_cluster_pages(struct page *page, struct page *pg[],
+ struct page *epg[])
+{
+ int i = 0;
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (!pg[i])
+ break;
+ if (epg && epg[i])
+ kmap(epg[i]);
+ else
+ kmap(pg[i]);
+ }
+
+ if (page)
+ kmap(page);
+ return 0;
+}
+
+
+int ext2_kunmap_cluster_pages(struct page *page, struct page *pg[],
+ struct page *epg[])
+{
+ int i = 0;
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (!pg[i])
+ break;
+ if (epg && epg[i])
+ kunmap(epg[i]);
+ else
+ kunmap(pg[i]);
+ }
+
+ if (page)
+ kunmap(page);
+ return 0;
+}
+#else //no high-mem:
+#define restore_b_data_himem(bh) ;
+#endif
+
+
+/*none compression dummy functions*/
+size_t ext2_iNONE (int action) { return 0; }
+size_t ext2_wNONE (__u8 *ibuf, __u8 *obuf, void *wa, size_t ilen, size_t olen, int xarg) { return 0; }
+size_t ext2_rNONE (__u8 *ibuf, __u8 *obuf, void *wa, size_t ilen, size_t olen, int xarg) { return 0; }
+
+/*
+ * Algorithm and method tables
+ */
+struct ext2_algorithm ext2_algorithm_table[] = {
+ /* Note: all algorithms must have the `name' field filled in.
+ This is used to autoload algorithm modules (ext2-compr-%s), and
+ in kernel printk. */
+ /* N.B. Do not renumber these algorithms! (To do so is to change
+ the binary format.) It's OK for `none' and `undef' to be
+ renumbered, though. */
+
+ /* Fields:
+ name; available; routines for:
+ init, compress, decompress. */
+ {"lzv1", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
+ {"lzrw3a", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
+ {"gzip", 1, ext2_iZLIB, ext2_wZLIB, ext2_rZLIB}, //Andreas: workaround
+ {"bzip2", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
+ {"lzo", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
+ {"none", 1, ext2_iNONE, ext2_wNONE, ext2_rNONE},
+
+ /* This "algorithm" is for unused entries in the method table.
+ It differs from EXT2_NONE_ALG in that it is considered
+ unavailable, whereas `none' is always available. */
+ {"undef", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
+
+};
+
+/* Note: EXT2_N_ALGORITHMS can't be increased beyond 16 without
+ changing the width of the s_algorithms_used field in the in-memory
+ superblock. The on-disk s_algorithms_used field is 32 bits long.
+ (This is in a state of flux. Currently (1998-02-05) there is no
+ distinction: we always use the s_es copy. */
+
+/* The size of this table must be 32 to prevent Oopsen from
+ invalid data. We index this from 5 bits of i_flags, so
+ the size is (1 << 5) == 32. */
+struct ext2_method ext2_method_table[32] = {
+ /* Fields: algorithm id, algorithm argument. */
+ {EXT2_LZV1_ALG, 0},
+ {EXT2_NONE_ALG, 0}, /* 1: auto */
+ {EXT2_NONE_ALG, 0}, /* 2: defer */
+ {EXT2_NONE_ALG, 0}, /* 3: never */
+ {EXT2_BZIP2_ALG, 0}, /* 4: bzip2 */
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_LZRW3A_ALG, 0}, /* 8: lzrw3a */
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_LZO_ALG, 0}, /* 10: lzo1x_1 */
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_GZIP_ALG, 1}, /* 16 */
+ {EXT2_GZIP_ALG, 2},
+ {EXT2_GZIP_ALG, 3},
+ {EXT2_GZIP_ALG, 4},
+ {EXT2_GZIP_ALG, 5},
+ {EXT2_GZIP_ALG, 6},
+ {EXT2_GZIP_ALG, 7},
+ {EXT2_GZIP_ALG, 8},
+ {EXT2_GZIP_ALG, 9},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0},
+ {EXT2_UNDEF_ALG, 0}
+};
+
+
+static void ext2_mark_algorithm_use(struct inode *inode, unsigned alg)
+{
+ struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb);
+
+ /* Hopefully, lock_super() isn't needed here, as we don't
+ block in the critical region. True? */
+ assert(alg < EXT2_N_ALGORITHMS);
+ if (sbi->s_es->s_feature_incompat
+ & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)) {
+ sbi->s_es->s_algorithm_usage_bitmap |= cpu_to_le32(1 << alg);
+ } else {
+ struct ext2_super_block *es = sbi->s_es;
+
+ es->s_algorithm_usage_bitmap = cpu_to_le32(1 << alg);
+ es->s_feature_incompat
+ |= cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION);
+ if (es->s_rev_level < EXT2_DYNAMIC_REV) {
+ /* Raise the filesystem revision level to
+ EXT2_DYNAMIC_REV so that s_feature_incompat
+ is honoured (except in ancient kernels /
+ e2fsprogs). We must also initialize two
+ other dynamic-rev fields. The remaining
+ fields are assumed to be already correct
+ (e.g. still zeroed). */
+ es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV);
+ es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
+ es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE);
+ }
+ }
+ mark_buffer_dirty(sbi->s_sbh);
+}
+
+
+/* Displays an error message if algorithm ,alg` is not marked in use,
+ and then marks it in use. */
+static void ext2_ensure_algorithm_use(struct inode *inode, unsigned alg)
+{
+ assert(alg < EXT2_N_ALGORITHMS);
+
+ if (!(EXT2_SB(inode->i_sb)->s_es->s_algorithm_usage_bitmap
+ & cpu_to_le32(1 << alg))) {
+ ext2_msg(inode->i_sb, "algorithm usage bitmap algorithm %s not marked used in inode %lu",
+ ext2_algorithm_table[alg].name, inode->i_ino);
+ ext2_mark_algorithm_use(inode, alg);
+ }
+}
+
+
+/*mw: out of cache bug fix 5-16-07 */
+static void create_empty_buffers_e2c(struct page *page,
+ unsigned long blocksize,
+ unsigned long b_state,
+ struct inode *inode)
+{
+ struct buffer_head *bh, *head, *tail;
+
+ head = alloc_page_buffers(page, blocksize, 1);
+ bh = head;
+ do {
+ bh->b_state |= b_state;
+ tail = bh;
+ bh->b_bdev = NULL; //mw: make it like 2.4
+ bh->b_blocknr = 0; //mw: make it like 2.4
+ bh->b_end_io = NULL; //mw: make it like 2.4
+ bh = bh->b_this_page;
+ } while (bh);
+ tail->b_this_page = head;
+ spin_lock(&inode->i_mapping->private_lock);
+ if (PageUptodate(page) || PageDirty(page)) {
+ bh = head;
+ do {
+ if (PageDirty(page))
+ set_buffer_dirty(bh);
+ if (PageUptodate(page))
+ set_buffer_uptodate(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+ attach_page_buffers(page, head);
+ spin_unlock(&inode->i_mapping->private_lock);
+}
+
+int ext2_get_cluster_pages(struct inode *inode, u32 cluster,
+ struct page *pg[], struct page *page, int compr)
+{
+ int nbpg, npg, i;
+ u32 page0; /* = position within file (not position within fs). */
+ u32 idx = 0;
+ struct page *cached_page;
+ struct pagevec lru_pvec;
+
+ /*mw */
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
+ pg[i] = NULL;
+
+ cached_page = NULL;
+ pagevec_init(&lru_pvec, 0);
+
+ page0 = ext2_cluster_page0(inode, cluster);
+ nbpg = ext2_cluster_npages(inode, cluster);
+
+ if (compr && (((page0 + nbpg) << PAGE_CACHE_SHIFT) > inode->i_size))
+ nbpg = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT) - page0 + 1;
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c("ext2_get_cluster_pages: page0=%d, nbpg=%d page=%ld\n",
+ page0, nbpg, ((page != NULL) ? page->index : 0));
+#endif
+ for (npg = 0; npg < nbpg; npg++) {
+ if ((page == NULL) || ((page0 + npg) != page->index)) {
+ //pg[npg] = __grab_cache_page(inode->i_mapping, page0+npg); /* &cached_page, &lru_pvec);*/
+ pg[npg] = grab_cache_page_write_begin(inode->i_mapping, page0+npg, 0);
+ if (!pg[npg])
+ goto error;
+ } else {
+ pg[npg] = page;
+ }
+ if (!page_has_buffers(pg[npg])) {
+ ClearPageUptodate(pg[npg]);
+ ClearPageDirty(pg[npg]);
+ create_empty_buffers_e2c(pg[npg], inode->i_sb->s_blocksize, 0, inode);
+ if (unlikely(!page_has_buffers(pg[npg])))
+ trace_e2c("ext2_get_cluster_pages: NOMEM!\n");
+ assert(!PageUptodate(pg[npg]));
+ assert(!PageDirty(pg[npg]));
+ }
+ }
+ //set remaining pages to NULL
+ for (idx = npg; idx < EXT2_MAX_CLUSTER_PAGES; idx++)
+ pg[idx] = NULL;
+
+ if (cached_page)
+ page_cache_release(cached_page);
+ pagevec_lru_add_file(&lru_pvec);
+ pagevec_free(&lru_pvec);
+ return (npg);
+ error:
+ if (cached_page)
+ page_cache_release(cached_page);
+ pagevec_lru_add_file(&lru_pvec);
+ pagevec_free(&lru_pvec);
+ while (--npg >= 0) {
+ if ((page == NULL) || ((page0 + npg) != page->index)) {
+ unlock_page(pg[npg]);
+ page_cache_release(pg[npg]);
+ }
+ pg[npg] = NULL;
+ }
+ trace_e2c("ext2_get_cluster_pages: error no page\n");
+ return (-ENOMEM);
+}
+
+
+int ext2_get_cluster_extra_pages(struct inode *inode, u32 cluster,
+ struct page *pg[], struct page *epg[])
+{
+ struct page *page;
+ int nbpg, npg, i;
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
+ epg[i] = NULL;
+
+ nbpg = ext2_cluster_npages(inode, cluster);
+ for (npg = 0; npg < nbpg; npg++) {
+ if (pg[npg] == NULL)
+ break;
+ if (PageUptodate(pg[npg])) {
+ //page = page_cache_alloc(inode->i_mapping);
+ //mw: has gfp-mask of adress-space: gfp_t mapping_gfp_mask(struct address_space * mapping)
+ // don't trigger. shrink_dcache_memory which might call ext2_cleanup_compressed_inode with the SAME mutex.
+ page = __page_cache_alloc(GFP_NOFS);
+
+ if (!page) {
+ goto error;
+ }
+ ClearPageError(page);
+ ClearPageReferenced(page);
+ ClearPageUptodate(page);
+ ClearPageDirty(page);
+ lock_page(page);
+ page->index = pg[npg]->index;
+
+ if (!page_has_buffers(page)) {
+ create_empty_buffers_e2c(page, inode->i_sb->s_blocksize, 0,
+ inode);
+ /*mw : only the "extra_pages" for decompression need create_empty_buffers_unlocked, because
+ * they have no mapping-context and they must not have one. Otherwise they get need a page->index
+ * which belongs always to an address_space object (e.g.: inode). But I think this is not intented here.
+ * we just need thei buffers for a short time of decompression */
+ if (unlikely(!page_has_buffers(page)))
+ return printk("Error: NOMEM!\n");
+ }
+
+ epg[npg] = page;
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c
+ ("ext2_get_cluster_extra_pages: allocated page idx=%ld\n",
+ pg[npg]->index);
+#endif
+ } else {
+ epg[npg] = NULL;
+ }
+ }
+ return (npg);
+ error:
+ while (--npg >= 0)
+ if (epg[npg]) {
+ ClearPageDirty(epg[npg]);
+ ClearPageUptodate(epg[npg]);
+ try_to_free_buffers(epg[npg]);
+ unlock_page(epg[npg]);
+ assert(page_count(epg[npg]) == 1);
+ page_cache_release(epg[npg]);
+ }
+ trace_e2c("ext2_get_cluster_extra_pages: error no page\n");
+ return (-ENOMEM);
+
+}
+
+/* Read every block in the cluster. The blocks are stored in the bh
+ array, which must be big enough.
+
+ Return the number of block contained in the cluster, or -errno if an
+ error occured. The buffers should be released by the caller
+ (unless an error occurred).
+
+ The inode must be locked, otherwise it is possible that we return
+ some out of date blocks.
+
+ Called by :
+
+ ext2_decompress_cluster() [i_sem]
+ ext2_compress_cluster() [i_sem]
+ ext2_readpage() [i_sem] */
+
+
+int ext2_get_cluster_blocks(struct inode *inode, u32 cluster,
+ struct buffer_head *bh[], struct page *pg[],
+ struct page *epg[], int compr)
+{
+ struct buffer_head *br[EXT2_MAX_CLUSTER_BLOCKS];
+ int nreq, nbh = 0, npg, i;
+ u32 clu_nblocks;
+ int err;
+ const int blocks = PAGE_CACHE_SIZE >> inode->i_sb->s_blocksize_bits;
+
+ /*mw */
+ for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++)
+ bh[i] = NULL;
+
+ assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
+
+ /*
+ * Request full cluster.
+ */
+ {
+ u32 endblk;
+ u32 block; /* = position within file (not position within fs). */
+ u32 nbpg;
+ u32 page0; /* = position within file (not position within fs). */
+ u32 idx;
+
+ block = ext2_cluster_block0(inode, cluster);
+ clu_nblocks = ext2_cluster_nblocks(inode, cluster);
+ /* impl: Don't shorten endblk for i_size. The
+ remaining blocks should be NULL anyway, except in
+ the case when called from ext2_decompress_cluster
+ from ext2_truncate, in which case i_size is short
+ and we _want_ to get all of the blocks. */
+ endblk = block + clu_nblocks;
+
+ page0 = ext2_cluster_page0(inode, cluster);
+ nbpg = ext2_cluster_npages(inode, cluster);
+
+ if (compr
+ && (((page0 + nbpg) << PAGE_CACHE_SHIFT) > inode->i_size)) {
+ nbpg = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT) - page0 + 1;
+ endblk =
+ block +
+ (nbpg <<
+ (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits));
+ }
+
+ idx = page0 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c("ext2_get_cluster_blocks: page0=%d, nbpg=%d\n", page0,
+ nbpg);
+#endif
+ for (npg = 0; npg < nbpg; npg++) {
+ struct buffer_head *buffer;
+
+ if ((epg != NULL) && (epg[npg] != NULL))
+ buffer = page_buffers(epg[npg]);
+ else
+ buffer = page_buffers(pg[npg]);
+ for (i = 0; i < blocks && (block + nbh) < endblk;
+ buffer = buffer->b_this_page, i++) {
+ if (idx == (block + nbh)) {
+ bh[nbh] = buffer;
+ nbh++;
+ }
+ idx++;
+ }
+ }
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c
+ ("ext2_get_cluster_blocks: get every pages and %d buffers\n",
+ nbh);
+#endif
+
+ for (nbh = 0, nreq = 0; block < endblk; nbh++) {
+ assert(bh[nbh] != NULL);
+ bh[nbh]->b_blocknr = 0;
+ clear_bit(BH_Mapped, &bh[nbh]->b_state);
+
+ //mw: does not work with 2.6 and holes!!!
+ //err=ext2_get_block(inode, block++, bh[nbh], (PageDirty(bh[nbh]->b_page) ? 1 : 0));
+ err = ext2_get_block(inode, block++, bh[nbh], 0);
+ /* mw: 0: we dont' create non existing blocks here
+ * let's do it just before the writeback, when we know, which blocks we really need...*/
+ //err=ext2_get_block(inode, block++, bh[nbh], (buffer_dirty(bh[nbh]) ? 1 : 0));
+
+ /* mw: bdev-bug-fix: for files which got compressed and now consume less buffers
+ * ext2_get_block returns 0, for a empty-block. As these buffer were used before
+ * the bh[nbh]->b_bdev might be != NULL or just invalid. So we set them explicitly
+ * to NULL. */
+ //printk("Get Block cluster %i: (%#x):%i Blk-NR:%lu(%lu)[%lu-%lu] Bdev:%#x(%#x), PGDirty:%i, mapped:%i, PID: %lu\n", cluster, bh[nbh], nbh, block,
+
+ //if we are not mapped, then the blocknr will be wrong
+ //we set a bdev here the we will write to some "random" block
+ if (!buffer_mapped(bh[nbh])) {
+ bh[nbh]->b_bdev = NULL; /* don't write wrongly mapped blocks !!! */
+ /* mw: you encounter null pointer oops you MUST
+ * map your buffer using ext2_get_block()*/
+ }
+
+ if (bh[nbh]->b_blocknr != 0) {
+ if (!buffer_uptodate(bh[nbh])
+ /* TODO: Do we need this
+ `!buffer_locked' test? */
+ && !buffer_locked(bh[nbh])
+ && !PageDirty(bh[nbh]->b_page))
+ br[nreq++] = bh[nbh];
+ } else if ((err != 0)
+ && (err != -EFBIG))
+ /* impl: for some unknown reason,
+ ext2_getblk() returns -EFBIG if
+ !create and there's a hole. ==> not right any more in 2.4 */
+ goto error;
+ }
+ for (i = nbh; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
+ bh[i] = NULL;
+ }
+ }
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c("ext2_get_cluster_blocks: nreq=%d for cluster=%d\n", nreq,
+ cluster);
+#endif
+
+ //read all blocks, which are not null-blocks
+ if (nreq > 0)
+ ll_rw_block(READ, nreq, br);
+
+ /*
+ * Adjust nbh if we have some null blocks at end of cluster.
+ */
+ while ((nbh != 0) && (bh[nbh - 1]->b_blocknr == 0))
+ nbh--;
+
+ /*
+ * Wait for blocks.
+ */
+ err = -EIO;
+ CHECK_NOT_ATOMIC
+ for (i = 0; i < nbh; i++)
+ if ((!PageDirty(bh[i]->b_page)) && (bh[i]->b_blocknr != 0)) {
+ wait_on_buffer(bh[i]);
+ if (!buffer_uptodate(bh[i])) { /* Read error ??? */
+ trace_e2c
+ ("ext2_get_cluster_blocks: wait_on_buffer error (blocknr=%ld)\n",
+ bh[i]->b_blocknr);
+ goto error;
+ }
+ }
+ assert(nbh <= EXT2_MAX_CLU_NBLOCKS);
+
+ return nbh;
+
+ error:
+ printk("ERROR: ext2_get_cluster_blocks()\n");
+ return err;
+}
+
+
+/* Iterations over block in the inode are done with a generic
+ iteration key mechanism. We need one method to convert a block
+ number into a new key, one method to iterate (i.e., increment the
+ key) and one method to free the key. The code could be shared with
+ truncate.c, as this mechanism is very general.
+
+ This code assumes tht nobody else can read or write the file
+ between ext2_get_key() and ext2_free_key(), so callers need to have
+ i_sem (which they all do anyway). */
+
+/* TODO: Get all of the bkey routines to return -errno instead of
+ true/false. */
+/* TODO: The bkey routines currently assume tht address blocks are
+ allocated even if all contained addresses are NULL, but this is not
+ true. Make sure tht we differentiate between NULL block and error,
+ and then fix up ext2_set_key_blkaddr() and anything else (including
+ the pack/unpack routines). */
+struct ext2_bkey {
+ int level;
+ u32 block;
+ struct inode *inode;
+ int off[4];
+ u32 *ptr[4];
+ struct buffer_head *ibh[4];
+};
+
+
+/*
+ * Method to convert a block number into a key.
+ *
+ * Returns 1 on success, 0 on failure. You may safely, but need
+ * not, free the key even if ext2_get_key() fails.
+ */
+static int ext2_get_key(struct ext2_bkey *key, struct inode *inode,
+ u32 block)
+{
+ int x, level;
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+
+ assert(atomic_read(&inode->i_mutex.count) <= 0);
+
+ /*
+ * The first step can be viewed as translating the
+ * original block number in a special base (powers
+ * of addr_per_block).
+ */
+
+ key->block = block;
+
+ key->off[0] = key->off[1] = key->off[2] = key->off[3] = 0;
+ key->ibh[0] = key->ibh[1] = key->ibh[2] = key->ibh[3] = NULL;
+ key->ptr[0] = key->ptr[1] = key->ptr[2] = key->ptr[3] = NULL;
+
+ if (block >= EXT2_NDIR_BLOCKS) {
+ block -= EXT2_NDIR_BLOCKS;
+
+ if (block >= addr_per_block) {
+ block -= addr_per_block;
+
+ if (block >= addr_per_block * addr_per_block) {
+ block -= addr_per_block * addr_per_block;
+
+ key->off[0] = EXT2_TIND_BLOCK;
+ key->off[1] = (block / (addr_per_block * addr_per_block));
+ key->off[2] =
+ (block % (addr_per_block * addr_per_block)) /
+ addr_per_block;
+ key->off[3] = (block % addr_per_block);
+ level = 3;
+ } else {
+ key->off[0] = EXT2_DIND_BLOCK;
+ key->off[1] = block / addr_per_block;
+ key->off[2] = block % addr_per_block;
+ level = 2;
+ }
+ } else {
+ key->off[0] = EXT2_IND_BLOCK;
+ key->off[1] = block;
+ level = 1;
+ }
+ } else {
+ key->off[0] = block;
+ level = 0;
+ }
+
+ /*
+ * In the second step, we load the needed buffers.
+ */
+
+ key->level = level;
+ key->inode = inode;
+
+ key->ptr[0] = (u32 *) (&(EXT2_I(inode)->i_data));
+
+ for (x = 1; x <= level; x++) {
+ u32 *ptr;
+
+ ptr = key->ptr[x - 1];
+ if (ptr == NULL)
+ break;
+/* Paul Whittaker tweak 19 Feb 2005 */
+ block = le32_to_cpu(ptr[key->off[x - 1]]);
+ if (block == 0)
+ continue; // TLL 05/01/07
+ if (x - 1 != 0)
+ block = le32_to_cpu(block);
+ if ((key->ibh[x] = __bread(inode->i_sb->s_bdev,
+ block, inode->i_sb->s_blocksize))
+ == NULL)
+ goto error;
+ key->ptr[x] = (u32 *) (key->ibh[x]->b_data);
+ }
+
+ return 1;
+ error:
+ for (; x != 0; x--)
+ if (key->ibh[x] != NULL)
+ brelse(key->ibh[x]);
+ return 0;
+}
+
+
+/*
+ * Find the block for a given key. Return 0 if there
+ * is no block for this key.
+ */
+static inline u32 ext2_get_key_blkaddr(struct ext2_bkey *key)
+{
+ assert(key->inode);
+ assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
+
+/* Paul Whittaker tweak 19 Feb 2005 */
+ if (key->ptr[key->level] == NULL)
+ return 0;
+ return le32_to_cpu(key->ptr[key->level][key->off[key->level]]);
+}
+
+
+/*
+ * Change the block for a given key. Return 0 on success,
+ * -errno on failure.
+ */
+static inline int ext2_set_key_blkaddr(struct ext2_bkey *key, u32 blkaddr)
+{
+ char bdn[BDEVNAME_SIZE];
+ assert(key->inode);
+ assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
+
+ if (key->ptr[key->level] == NULL) {
+ /* The reason that this "can't happen" is that this
+ routine is only used to shuffle block numbers or by
+ free_cluster_blocks. Cluster sizes are such that
+ clusters can't straddle address blocks. So the
+ indirect block address can't be zero. AFAIK, ptr
+ can only be NULL on error or on null indirect block
+ address. Hmm, come to think of it, I think there
+ are still some callers that don't check for errors
+ from ext2_get_key(), so this still can happen until
+ those are fixed up. */
+ printk(KERN_ERR
+ "ext2_set_key_blkaddr: can't happen: NULL parent. "
+ "dev=%s, ino=%lu, level=%u.\n",
+ bdevname(key->inode->i_sb->s_bdev, bdn),
+ key->inode->i_ino, key->level);
+ return -ENOSYS;
+ }
+ /* Paul Whittaker tweak 19 Feb 2005 */
+ key->ptr[key->level][key->off[key->level]] = le32_to_cpu(blkaddr);
+ if (key->level > 0)
+ mark_buffer_dirty(key->ibh[key->level]);
+ return 0;
+}
+
+
+/*
+ * Increment the key. Returns 0 if we go beyond the limits,
+ * 1 otherwise.
+ *
+ * Precondition: -key->off[level] <= incr < addr_per_block.
+ */
+static int ext2_next_key(struct ext2_bkey *key, int incr)
+{
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(key->inode->i_sb);
+ int x, level = key->level;
+ u32 tmp;
+
+ assert(key->inode);
+ assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
+
+
+ /*
+ * Increment the key. This is done in two step: first
+ * adjust the off array, then reload buffers that should
+ * be reloaded (we assume level > 0).
+ */
+
+ assert(key->off[level] >= -incr);
+ assert(incr < addr_per_block);
+ key->block += incr;
+ key->off[level] += incr;
+
+ /*
+ * First step: should be thought as the propagation
+ * of a carry.
+ */
+
+ if (level == 0) {
+ if (key->off[0] >= EXT2_NDIR_BLOCKS) {
+ key->off[1] = key->off[0] - EXT2_NDIR_BLOCKS;
+ key->off[0] = EXT2_IND_BLOCK;
+ level = 1;
+ }
+ x = 0;
+ } else {
+ for (x = level; x > 0; x--) {
+ if (key->off[x] >= addr_per_block) {
+ key->off[x] -= addr_per_block;
+ key->off[x - 1]++;
+
+ if (x == 1) {
+ if (++level < 4) {
+ key->off[level] = key->off[level - 1];
+ key->off[level - 1] = 0;
+ } else
+ return 0;
+ }
+ } else
+ break;
+ }
+ }
+
+ /*
+ * Second step: reload the buffers that have changed.
+ */
+
+ key->level = level;
+
+ CHECK_NOT_ATOMIC
+ while (x++ < level) {
+ if (key->ibh[x] != NULL) {
+ if (IS_SYNC(key->inode) && buffer_dirty(key->ibh[x])) {
+ //mw:
+ assert(buffer_mapped(key->ibh[x])
+ && (key->ibh[x]->b_bdev != NULL));
+ ll_rw_block(WRITE, 1, &(key->ibh[x]));
+ wait_on_buffer(key->ibh[x]);
+ }
+ brelse(key->ibh[x]);
+ }
+/* Paul Whittaker tweak 19 Feb 2005 */
+ if ((key->ptr[x - 1] != NULL)
+ && ((tmp = le32_to_cpu(key->ptr[x - 1][key->off[x - 1]])) !=
+ 0)) {
+ if ((key->ibh[x] =
+ __bread(key->inode->i_sb->s_bdev, tmp,
+ key->inode->i_sb->s_blocksize))
+ != NULL)
+ key->ptr[x] = (u32 *) (key->ibh[x]->b_data);
+ else
+ key->ptr[x] = NULL;
+ } else {
+ key->ibh[x] = NULL;
+ key->ptr[x] = NULL;
+ }
+ }
+
+ return 1;
+}
+
+
+/* Method to free the key: just release buffers.
+
+ Returns 0 on success, -errno on error.
+*/
+
+static int ext2_free_key(struct ext2_bkey *key)
+{
+ int x, n;
+ struct buffer_head *bh[4];
+
+ assert(key->inode);
+ assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
+
+
+ for (x = 0, n = 0; x <= key->level; x++) {
+ if (key->ibh[x] != NULL) {
+ if (IS_SYNC(key->inode) && buffer_dirty(key->ibh[x]))
+ bh[n++] = key->ibh[x];
+ else
+ brelse(key->ibh[x]);
+ }
+ }
+
+ if (n > 0) {
+ int ncopy = n;
+ while (ncopy-- > 0) {
+ assert(buffer_mapped(bh[ncopy])
+ && (bh[ncopy]->b_bdev != NULL));
+ }
+
+ ll_rw_block(WRITE, n, bh);
+
+ CHECK_NOT_ATOMIC
+
+ while (n-- > 0) {
+ wait_on_buffer(bh[n]);
+ /* TODO: Check for error. */
+ brelse(bh[n]);
+ }
+ }
+ return 0;
+}
+
+
+/* Returns positive if specified cluster is compressed,
+ zero if not,
+ -errno if an error occurred.
+
+ If you need the result to be accurate, then down i_sem before
+ calling this, and don't raise i_sem until after you've used the
+ result. */
+int ext2_cluster_is_compressed_fn(struct inode *inode, unsigned cluster)
+{
+ unsigned block = (ext2_cluster_block0(inode, cluster)
+ + ext2_cluster_nblocks(inode, cluster)
+ - 1);
+ struct ext2_bkey key;
+ int result;
+
+ assert(atomic_read(&inode->i_mutex.count) <= 0);
+
+ /* impl: Not all callers of ext2_cluster_is_compressed_fn() have
+ i_sem down. Of course it is impossible to guarantee
+ up-to-date information for such callers (someone may
+ compress or decompress between when we check and when they
+ use the information), so hopefully it won't matter if the
+ information we return is slightly inaccurate (e.g. because
+ someone is de/compressing the cluster while we check). */
+ if (!ext2_get_key(&key, inode, block))
+ return -EIO;
+
+ result = (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR);
+ ext2_free_key(&key);
+ return result;
+}
+
+
+/* Support for the GETCOMPRRATIO ioctl() call. We calculate how many
+ blocks the file would hold if it weren't compressed. This requires
+ reading the cluster head for every compressed cluster.
+
+ Returns either -EAGAIN or the number of blocks that the file would
+ take up if uncompressed. */
+int ext2_count_blocks(struct inode *inode)
+{
+ struct buffer_head *head_bh;
+ int count;
+ int cluster;
+ struct ext2_bkey key;
+ u32 end_blknr;
+
+ if (!(EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))
+ return inode->i_blocks;
+
+ mutex_lock(&inode->i_mutex);
+ end_blknr = ROUNDUP_RSHIFT(inode->i_size,
+ inode->i_sb->s_blocksize_bits);
+
+ /* inode->i_blocks is stored in units of 512-byte blocks. It's
+ more convenient for us to work in units of s_blocksize. */
+ {
+ u32 shift = inode->i_sb->s_blocksize_bits - 9;
+
+ count = inode->i_blocks;
+ if (count & ((1 << shift) - 1))
+ ext2_msg(inode->i_sb,
+ "ext2_count_blocks",
+ "i_blocks not multiple of blocksize");
+ count >>= shift;
+ }
+
+ cluster = 0;
+ if (!ext2_get_key(&key, inode, 0)) {
+ count = -EIO;
+ goto out;
+ }
+ while (key.block < end_blknr) {
+ u32 head_blkaddr = ext2_get_key_blkaddr(&key);
+
+ /* bug fix: init head_bh for each iteration TLL 2/21/07 */
+ head_bh = NULL;
+ if (head_blkaddr == EXT2_COMPRESSED_BLKADDR) {
+ count = -EXT2_ECOMPR;
+ break;
+ }
+ if (!ext2_next_key(&key, ext2_cluster_nblocks(inode, cluster) - 1))
+ break;
+ if (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR) {
+ struct ext2_cluster_head *head;
+
+ if (head_blkaddr == 0) {
+ count = -EXT2_ECOMPR;
+ break;
+ }
+ head_bh = __getblk(inode->i_sb->s_bdev,
+ head_blkaddr, inode->i_sb->s_blocksize);
+ if (head_bh == NULL) {
+ /* Hmm, EAGAIN or EIO? */
+ count = -EAGAIN;
+ break;
+ }
+ if (!buffer_uptodate(head_bh))
+ ll_rw_block(READ, 1, &head_bh);
+
+ CHECK_NOT_ATOMIC
+
+ wait_on_buffer(head_bh);
+
+#ifdef CONFIG_HIGHMEM
+ if (!page_address(head_bh->b_page)) {
+ BUG();
+ }
+#endif
+
+ head = (struct ext2_cluster_head *) head_bh->b_data;
+ /* remove clen > ulen test TLL 2/21/07 */
+ if ((head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X))
+ || (le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)
+ || (head->holemap_nbytes > 4)) {
+ count = -EXT2_ECOMPR;
+ break;
+ }
+ assert(sizeof(struct ext2_cluster_head) == 16);
+ count += (ROUNDUP_RSHIFT(le32_to_cpu(head->ulen),
+ inode->i_sb->s_blocksize_bits)
+ - ROUNDUP_RSHIFT((le32_to_cpu(head->clen)
+ + sizeof(struct ext2_cluster_head)
+ + head->holemap_nbytes),
+ inode->i_sb->s_blocksize_bits));
+ brelse(head_bh);
+ head_bh = NULL;
+ }
+
+ if (!ext2_next_key(&key, 1))
+ break;
+ cluster++;
+ }
+ ext2_free_key(&key);
+ if (head_bh != NULL)
+ brelse(head_bh);
+ out:
+ mutex_unlock(&inode->i_mutex);
+ if (count == -EXT2_ECOMPR) {
+ ext2_msg(inode->i_sb,
+ "ext2_count_blocks",
+ "invalid compressed cluster %u of inode %lu",
+ cluster, inode->i_ino);
+ EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
+ }
+
+ /* The count should be in units of 512 (i.e. 1 << 9) bytes. */
+ if (count >= 0)
+ count <<= inode->i_sb->s_blocksize_bits - 9;
+ return count;
+}
+
+
+/* Decompress some blocks previously obtained from a cluster.
+ Decompressed data is stored in ext2_rd_wa.u. Buffer heads in the bh
+ array are packed together at the begining of the array. The ulen
+ argument is an indication of how many bytes the caller wants to
+ obtain, excluding holes. (This can be less than head->ulen, as in the
+ case of readpage.) No hole processing is done; we don't even look at
+ head->holemap.
+
+ Note the semantic difference between this and
+ (): the latter decompresses a cluster _and
+ stores it as such_, whereas ext2_decompress_blocks() just
+ decompresses the contents of the blocks into ext2_rd_wa.u.
+
+ The working area is supposed to be available and locked.
+
+ Returns a negative value on failure, the number of bytes
+ decompressed otherwise.
+
+ Called by :
+
+ ext2_decompress_cluster () [sem down]
+ ext2_readpage () [sem down, but only ifndef EXT2_LOCK_BUFFERS] */
+
+/* TODO: ext2_decompress_blocks() scribbles in ext2_rd_wa.c.
+ Check callers to make sure this isn't a problem. */
+
+/* mw: caller must already have done: "get_cpu_var(ext2_rd_wa)" */
+size_t
+ext2_decompress_blocks(struct inode * inode,
+ struct buffer_head ** bh,
+ int nblk, size_t ulen, u32 cluster)
+{
+ struct ext2_cluster_head *head;
+ int count, src_ix, x;
+ unsigned char *dst;
+ unsigned meth, alg;
+ char bdn[BDEVNAME_SIZE];
+
+#ifdef EXT2_COMPR_DEBUG
+ assert(in_atomic());
+ assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
+#endif
+
+ /*
+ We pack the buffer together before (and must take care
+ not to duplicate the buffer heads in the array).
+
+ pjm 1998-01-09: Starting from e2compr-0.4.0, they should
+ already be packed together in the blkaddr array. TODO:
+ Insert appropriate assert() statements checking tht this is
+ the case. TODO: Check that callers have bh[] packed. */
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c("ext2_decompress_blocks: nblk=%d\n", nblk);
+#endif
+ for (src_ix = 0, x = 0; src_ix < nblk; src_ix++) {
+ if (bh[src_ix] == NULL)
+ printk("no_bheader()\n");
+ if ((bh[src_ix] != NULL) && (bh[src_ix]->b_blocknr != 0)) {
+
+ if (x < src_ix) {
+ ext2_msg(inode->i_sb, "bad buffer table",
+ "inode = %lu", inode->i_ino);
+ goto error;
+ }
+ x++;
+ }
+ }
+
+ nblk = x;
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c("ext2_decompress_blocks (2): nblk=%d\n", nblk);
+#endif
+ if (nblk == 0) {
+ ext2_msg(inode->i_sb, "no block in cluster", "inode = %lu",
+ inode->i_ino);
+ goto error;
+ }
+
+ restore_b_data_himem(bh[0]);
+ head = (struct ext2_cluster_head *) (bh[0]->b_data);
+
+ /*
+ * Do some consistency checks.
+ */
+
+ if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
+ ext2_msg(inode->i_sb,
+ "bad magic number",
+ "inode = %lu, magic = %#04x",
+ inode->i_ino, le16_to_cpu(head->magic));
+ goto error;
+ }
+#if EXT2_GRAIN_SIZE & (EXT2_GRAIN_SIZE - 1)
+# error "This code assumes EXT2_GRAIN_SIZE to be a power of two."
+#endif
+ /* The macro also assumes that _a > 0, _b > 0. */
+#define ROUNDUP_GE(_a, _b, _d) ( ( ((_a) - 1) \
+ | ((_d) - 1)) \
+ >= ( ((_b) - 1) \
+ | ((_d) - 1)))
+
+ //mw: following 3 just for debugging!!!
+ assert(!((le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)));
+ assert(!((head->clen == 0)));
+ assert(!(ROUNDUP_GE(le32_to_cpu(head->clen)
+ + head->holemap_nbytes + sizeof(struct ext2_cluster_head),
+ le32_to_cpu(head->ulen), EXT2_GRAIN_SIZE)));
+
+ if ((le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)
+ || (head->clen == 0)
+ || ROUNDUP_GE(le32_to_cpu(head->clen)
+ + head->holemap_nbytes
+ + sizeof(struct ext2_cluster_head),
+ le32_to_cpu(head->ulen), EXT2_GRAIN_SIZE)) {
+ ext2_msg(inode->i_sb,
+ "invalid cluster len",
+ "inode = %lu, len = %u:%u",
+ inode->i_ino,
+ le32_to_cpu(head->clen), le32_to_cpu(head->ulen));
+ goto error;
+ }
+#undef ROUNDUP_GE
+
+ /* TODO: Test for `nblk != 1 + ...' instead of the current
+ one-sided test. However, first look at callers, and make
+ sure that they handle the situation properly (e.g. freeing
+ unneeded blocks) and tht they always pass a correct
+ value for nblk. */
+ if (nblk <= ((le32_to_cpu(head->clen)
+ + head->holemap_nbytes + sizeof(struct ext2_cluster_head)
+ - 1)
+ / bh[0]->b_size)) {
+ int i;
+ ext2_msg(inode->i_sb,
+ "missing blocks",
+ "inode = %lu, blocks = %d/%u",
+ inode->i_ino, nblk, ((le32_to_cpu(head->clen)
+ + head->holemap_nbytes
+ + sizeof(struct ext2_cluster_head)
+ - 1)
+ / bh[0]->b_size) + 1);
+ printk("i_size=%d\n", (int) inode->i_size);
+ for (i = 0; i < 12; i++)
+ printk("i_data[%d]=%d\n", i, EXT2_I(inode)->i_data[i]);
+ printk("cluster_head (sizeof head=%u):\n\tmagic=0x%4x\n\tmethod=%d\n\t \
+ holemap_nbytes=%d\n\tulen=%d\n\tclen=%d\n\tbh->b_size=%zu\n",
+ sizeof(struct ext2_cluster_head), head->magic,
+ (int) head->method, (int) head->holemap_nbytes, head->ulen,
+ head->clen, bh[0]->b_size);
+ goto error;
+ }
+
+ /* I moved it here in case we need to load a module that
+ * needs more heap that is currently allocated.
+ * In such case "init_module" for that algorithm forces
+ * re-allocation of ext2_wa. It should be safe here b/c the
+ * first reference to ext2_wa comes just after and we have
+ * locked ext2_wa before.
+ *
+ * FIXME: Totally separate working areas for reading and writing.
+ * Jan R.
+ */
+ meth = head->method; /* only a byte, so no swabbing needed. */
+ if (meth >= EXT2_N_METHODS) {
+ ext2_msg(inode->i_sb,
+ "Ass: illegal method id",
+ "inode = %lu, id = %u", inode->i_ino, meth);
+ dump_stack();
+ goto error;
+ }
+ alg = ext2_method_table[meth].alg;
+
+ /*
+ * Adjust the length if too many bytes are requested.
+ *
+ * TODO: Traiter les bitmaps ici, et non plus au niveau de
+ * l'appelant. Faire un petit cache en memorisant le
+ * numero du dernier noeud decompresse et du dernier
+ * cluster. Le pb, c'est qu'on ne peut pas savoir si
+ * les blocs ont ete liberes et realloue entre temps
+ * -> il faut etre prevenu pour invalider le buffer.
+ *
+ * pjm fixme tr: Take care of the bitmaps here,
+ * instead of by the caller as we currently do. Keep
+ * a small cache that holds the number of the
+ * previous <inode, cluster> to have been
+ * decompressed. The problem is that we have no way
+ * of knowing whether the blocks have been freed and
+ * reallocated in the meantime / since last time ->
+ * we must be informed so that we can invalidate the
+ * buffer. */
+ if (ulen > le32_to_cpu(head->ulen)) {
+ memset(__get_cpu_var(ext2_rd_wa)->u + le32_to_cpu(head->ulen), 0, ulen - le32_to_cpu(head->ulen));
+ ulen = le32_to_cpu(head->ulen);
+
+ assert((bh[0]->b_size & (bh[nblk - 1]->b_size - 1)) == 0);
+ if (((le32_to_cpu(head->clen)
+ + head->holemap_nbytes + sizeof(struct ext2_cluster_head)
+ - 1)
+ | (bh[0]->b_size - 1))
+ >= ((ulen - 1) | (bh[0]->b_size - 1))) {
+ printk(KERN_WARNING
+ "ext2_decompress_blocks: "
+ "ulen (=%zu) or clen (=%u) wrong "
+ "in dev %s, inode %lu.\n",
+ ulen, le32_to_cpu(head->clen),
+ bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino);
+ goto error;
+ }
+ }
+
+ /*
+ * Now, decompress data.
+ */
+ /* TODO: Is this (ulen == 0) possible? */
+ if (ulen == 0)
+ return 0;
+
+ for (x = 0, dst = __get_cpu_var(ext2_rd_wa)->c; x < nblk; dst += bh[x++]->b_size) {
+ restore_b_data_himem(bh[x]);
+ memcpy(dst, bh[x]->b_data, bh[x]->b_size);
+ }
+
+
+ if (!ext2_algorithm_table[alg].avail) {
+ ext2_msg(inode->i_sb,
+ "ext2_decompress_blocks",
+ "algorithm `%s' not available for inode %lu",
+ ext2_algorithm_table[alg].name, inode->i_ino);
+ ext2_mark_algorithm_use(inode, alg);
+ goto error;
+ }
+
+
+#ifdef EXT2_COMPR_DEBUG
+ {
+ struct ext2_cluster_head *wa1head = (struct ext2_cluster_head *) __get_cpu_var(ext2_rd_wa)->c;
+ unsigned clen = le32_to_cpu(wa1head->clen);
+ if (wa1head->checksum !=
+ cpu_to_le32(ext2_adler32
+ (le32_to_cpu(*(u32 *) __get_cpu_var(ext2_rd_wa)->c),
+ __get_cpu_var(ext2_rd_wa)->c + 8,
+ (sizeof(struct ext2_cluster_head) - 8 +
+ head->holemap_nbytes + clen))))
+ {
+ head->checksum = cpu_to_le32(0);
+ ext2_msg(inode->i_sb, "ext2_decompress_blocks: corrupted compressed data ",
+ "in inode %lu", inode->i_ino);
+ //goto error;
+ //mw: we try to go on. if data is corrupt we will get an compression error anyway.
+ }
+ }
+#endif
+
+ count = ext2_algorithm_table[alg].decompress(__get_cpu_var(ext2_rd_wa)->c +
+ sizeof(struct
+ ext2_cluster_head) +
+ head->holemap_nbytes,
+ __get_cpu_var(ext2_rd_wa)->u,
+ __get_cpu_var(ext2_rd_wa)->heap,
+ le32_to_cpu(head->clen), ulen,
+ ext2_method_table[meth].xarg);
+
+ /* If we got fewer than ulen bytes, there is a problem, since
+ we corrected the ulen value before decompressing. Note
+ that it's OK for count to exceed ulen, because ulen can be
+ less than head->ulen. */
+ if ((count < ulen) || (count != le32_to_cpu(head->ulen))) {
+ ext2_msg(inode->i_sb,
+ "ext2_decompress_blocks: corrupted compressed data ", "inode = %lu, count = %u of %zu (%u/%u)",
+ inode->i_ino, count, ulen, le32_to_cpu(head->clen), le32_to_cpu(head->ulen));
+ goto error;
+ }
+ ext2_ensure_algorithm_use(inode, alg);
+ return count;
+
+ error:
+
+ /* Raise the ECOMPR flag for this file. What this means is
+ that the file cannot be written to, and can only be read if
+ the user raises the NOCOMPR flag.
+
+ pjm 1997-01-16: I've changed it so that files with ECOMPR
+ still have read permission, so user can still read the rest
+ of the file but get an I/O error (errno = EXT2_ECOMPR) when
+ they try to access anything from this cluster. */
+
+ EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
+
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+ /* pjm 1998-02-21: We used to do `memset(ext2_rd_wa.u, 0, ulen)'
+ here because once upon a time the user could sometimes see
+ buf contents. I believe that this can never happen any
+ more. */
+ return -EXT2_ECOMPR;
+}
+
+
+/* ext2_calc_free_ix: Calculates the position of the C_NBLK'th non-hole
+ block; equals C_NBLK plus the number of holes in the first CALC_FREE_IX()
+ block positions of the cluster.
+
+ pre: 1 =< c_nblk < EXT2_MAX_CLUSTER_BLOCKS,
+ Number of 1 bits in ,ubitmap` > ,c_nblk`.
+ post: c_nblk =< calc_free_ix() < EXT2_MAX_CLUSTER_BLOCKS
+
+ Called by:
+ ext2_decompress_cluster()
+ ext2_file_write()
+
+ TODO: Have ext2_compress_cluster() call this.
+ */
+unsigned ext2_calc_free_ix(unsigned holemap_nbytes, u8 const *holemap,
+ unsigned c_nblk)
+{
+ unsigned i;
+
+ assert(1 <= c_nblk);
+ assert(c_nblk < EXT2_MAX_CLUSTER_BLOCKS);
+ for (i = 0; (i < holemap_nbytes * 8) && (c_nblk > 0);) {
+ assert(i < EXT2_MAX_CLUSTER_BLOCKS - 1);
+ if ((holemap[i >> 3] & (1 << (i & 7))) == 0)
+ c_nblk--;
+ i++;
+ }
+ i += c_nblk;
+ assert(i < EXT2_MAX_CLUSTER_BLOCKS);
+ return i;
+}
+
+
+/* (): Prepare the blkaddr[] array for
+ decompression by moving non-hole blocks to their proper positions
+ (according to ubitmap) and zeroing any other blocks.
+
+ Returns 0 on success, -errno on error.
+
+ Note: We assume tht blkaddr[i] won't change under us forall
+ clu_block0 =< i < clu_block0 + clu_nblocks. Holding i_sem should
+ guarantee this.
+
+ Called by:
+ ext2_decompress_cluster()
+ ext2_file_write() */
+int
+ext2_unpack_blkaddrs(struct inode *inode,
+ struct buffer_head *bh[],
+ int mmcp,
+ unsigned holemap_nbytes,
+ u8 const *holemap,
+ unsigned c_nblk,
+ unsigned free_ix,
+ unsigned clu_block0, unsigned clu_nblocks)
+{
+ struct ext2_bkey key;
+ u32 *blkaddr;
+ unsigned si, di;
+
+ assert(clu_nblocks <= EXT2_MAX_CLUSTER_BLOCKS);
+ assert(1 <= c_nblk);
+ assert(c_nblk <= free_ix);
+ assert(free_ix < EXT2_MAX_CLUSTER_BLOCKS);
+ if (!ext2_get_key(&key, inode, clu_block0))
+ return -EIO;
+
+ if (key.ptr[key.level] == NULL) {
+ /* TODO: Call ext2_error(). */
+ ext2_free_key(&key);
+ return -EIO;
+ }
+
+ /* impl: Note tht we're relying on clusters not straddling
+ address block boundaries. */
+ blkaddr = &key.ptr[key.level][key.off[key.level]];
+ memset(blkaddr + free_ix,
+ 0, sizeof(*blkaddr) * (clu_nblocks - free_ix));
+ si = c_nblk;
+ for (di = free_ix; di > si;) {
+ --di;
+ if (((di >> 3) < holemap_nbytes)
+ && (holemap[di >> 3] & (1 << (di & 7)))) {
+ blkaddr[di] = 0;
+ bh[di]->b_blocknr = 0;
+ clear_bit(BH_Mapped, &bh[di]->b_state);
+ } else {
+ if (si == 0) {
+ break;
+ }
+ blkaddr[di] = blkaddr[--si];
+ assert(bh[di]->b_blocknr == 0);
+ assert(bh[si]->b_blocknr != 0);
+ assert(buffer_mapped(bh[si]));
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c("unpack: di=%d sts=0x%x si=%d blk=%ld sts=0x%x\n",
+ di, (int) bh[di]->b_state, si, bh[si]->b_blocknr,
+ (int) bh[si]->b_state);
+#endif
+ bh[di]->b_blocknr = bh[si]->b_blocknr;
+ set_bit(BH_Mapped, &bh[di]->b_state);
+ bh[si]->b_blocknr = 0;
+ clear_bit(BH_Mapped, &bh[si]->b_state);
+ set_bit(BH_Uptodate, &bh[di]->b_state);
+ if (mmcp) {
+ restore_b_data_himem(bh[si]);
+ restore_b_data_himem(bh[di]);
+ memcpy(bh[di]->b_data, bh[si]->b_data,
+ inode->i_sb->s_blocksize);
+ }
+ }
+ }
+ if (key.level > 0)
+ mark_buffer_dirty(key.ibh[key.level]);
+ return ext2_free_key(&key);
+}
+
+
+/*
+ * Decompress one cluster. If already compressed, the cluster
+ * is decompressed in place, and the compress bitmap is updated.
+ *
+ * Returns the size of decompressed data on success, a negative
+ * value in case of failure, or 0 if the cluster was not compressed.
+ *
+ * The inode is supposed to be writable.
+ *
+ * Called by :
+ *
+ * ext2_decompress_inode() [sem down]
+ * ext2_file_write() [sem down]
+ * trunc_bitmap() [sem down]
+ */
+int ext2_decompress_cluster(struct inode *inode, u32 cluster)
+{
+ struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
+ struct buffer_head *bhc[EXT2_MAX_CLUSTER_BLOCKS];
+ struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES];
+ int result, nbh;
+ unsigned npg, c_nblk;
+ struct ext2_cluster_head *head;
+ int i = 0;
+ unsigned free_ix, clu_block0, clu_nblocks;
+ int d_npg = -1; /* number of decompressed page */
+ unsigned long allpagesuptodate = 1;
+ struct buffer_head *bh_writeout[EXT2_MAX_CLUSTER_BLOCKS];
+ int bhn_writeout;
+#ifdef CONFIG_HIGHMEM
+ int kmapped = 0;
+#endif
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
+ bh_writeout[i] = NULL;
+ bhn_writeout = 0;
+ }
+
+ assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
+ epg[i] = NULL;
+
+ /*
+ Get blocks from cluster.
+ Assign to variables head, ubitmap, clu_block0, clu_nblocks.
+ Shuffle blkaddr[] array and write zero to holes.
+ Allocate new blocks.
+ Get the working area.
+ Decompress.
+ Copy to bh[]->b_data (marking buffers uptodate and dirty).
+ Release working area.
+ Release bh[].
+ */
+
+ nbh = 0;
+ npg = ext2_cluster_npages(inode, cluster);
+ result = ext2_get_cluster_pages(inode, cluster, pg, NULL, 0);
+ if (result <= 0) {
+ for (i = 0; i < npg; i++)
+ epg[i] = NULL;
+ goto out_err;
+ }
+
+ for (i = 0; i < npg; i++) {
+ if ((pg[i]->index <= ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) &&
+ !PageUptodate(pg[i])) {
+ allpagesuptodate = 0;
+ }
+ }
+ if (allpagesuptodate) {
+ //printk("DecompressPages: Ino:%lu\n", inode->i_ino);
+ result = ext2_decompress_pages(inode, cluster, pg);
+ if (result != 0) {
+ for (i = 0; i < npg; i++)
+ epg[i] = NULL;
+ if (result > 0)
+ goto cleanup;
+ else
+ goto out_err;
+ }
+ /*mw: if we continue here then in ext2_decompress_pages
+ * not all pages were up-to-date
+ */
+ }
+ //printk("DecompressCluster: Ino:%lu\n", inode->i_ino);
+ result = ext2_get_cluster_extra_pages(inode, cluster, pg, epg);
+ if (result <= 0) {
+ goto out_err;
+ }
+#ifdef CONFIG_HIGHMEM
+ ext2_kmap_cluster_pages(NULL, pg, epg);
+ kmapped = 1;
+#endif
+
+ result = ext2_get_cluster_blocks(inode, cluster, bh, pg, epg, 0);
+ if (result <= 0) {
+ goto out_err;
+ }
+ nbh = c_nblk = result;
+
+
+#ifdef EXT2_COMPR_REPORT
+ {
+ int j;
+ printk
+ (" > > > ext2_decompress_cluster %d: inode=%ld, size=%d nbh=%d\n",
+ cluster, inode->i_ino, (int) inode->i_size, nbh);
+#ifdef EXT2_COMPR_REPORT_VERBOSE
+ for (j = 0; j < nbh; j++) {
+ if (bh[j]) {
+ printk("0buffer_head[%d]: blocknr=%lu, addr=%p \n", j,
+ (unsigned long) bh[j]->b_blocknr, bh[j]);
+ if (bh[j]->b_page)
+ printk("0:[page->index=%ld]\n", bh[j]->b_page->index);
+ else
+ printk("[No page]\n");
+ } else
+ printk("buffer_head[%d] is NULL\n", j);
+ }
+ while ((j < EXT2_MAX_CLUSTER_BLOCKS) && (bh[j] != NULL) && bh[j]->b_blocknr) { /*Add by Yabo Ding */
+ printk
+ ("buffer_head[%d] is free but not NULL: blocknr=%lu, addr=%p\n",
+ j, (unsigned long) bh[j]->b_blocknr, bh[j]);
+ j++;
+ }
+#endif
+ }
+#endif
+ for (i = 0; i < nbh; i++)
+ assert(bh[i]->b_blocknr != 0);
+
+ restore_b_data_himem(bh[0]);
+
+ head = (struct ext2_cluster_head *) bh[0]->b_data;
+ if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
+ ext2_msg(inode->i_sb,
+ "ext2_decompress_cluster: bad magic number",
+ "cluster %d: inode = %lu, magic = %#04x",
+ cluster, inode->i_ino, le16_to_cpu(head->magic));
+ EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
+ result = -EXT2_ECOMPR;
+ goto out_err;
+ }
+ if (le32_to_cpu(head->ulen) -
+ (c_nblk << inode->i_sb->s_blocksize_bits) <= 0) {
+ ext2_error(inode->i_sb, "ext2_decompress_cluster",
+ "ulen too small for c_nblk. ulen=%u, c_nblk=%u, bs=%lu",
+ le32_to_cpu(head->ulen), c_nblk,
+ inode->i_sb->s_blocksize);
+ EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
+ result = -EXT2_ECOMPR;
+ goto out_err;
+ }
+ free_ix =
+ ext2_calc_free_ix(head->holemap_nbytes, (u8 const *) (&head[1]),
+ c_nblk);
+ clu_block0 = ext2_cluster_block0(inode, cluster);
+ clu_nblocks = ext2_cluster_nblocks(inode, cluster);
+ ext2_unpack_blkaddrs(inode, bh, 1,
+ head->holemap_nbytes, (u8 const *) (&head[1]),
+ c_nblk, free_ix, clu_block0, clu_nblocks);
+
+ /* Allocate the extra blocks needed. */
+ {
+ int data_left = le32_to_cpu(head->ulen);
+
+ data_left -= c_nblk << inode->i_sb->s_blocksize_bits;
+ assert(data_left > 0);
+ for (i = free_ix; i < clu_nblocks; i++)
+ if (((i >> 3) >= head->holemap_nbytes)
+ || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
+ result = ext2_get_block(inode,
+ clu_block0 + i,
+ bh[i], 1 /* create */ );
+ if (bh[i]->b_blocknr == 0)
+ goto out_err;
+ d_npg =
+ (i >>
+ (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) +
+ 1;
+ nbh++;
+ data_left -= inode->i_sb->s_blocksize;
+ if (data_left <= 0)
+ break;
+ }
+ }
+
+ /* jmr 1998-10-28 Hope this is the last time I'm moving this code.
+ * Module loading must be done _before_ we lock wa, just think what
+ * can happen if we reallocate wa when somebody else uses it...
+ */
+ {
+ unsigned meth;
+#ifdef CONFIG_KMOD
+ unsigned alg;
+#endif
+
+ meth = head->method; /* only a byte, so no swabbing needed. */
+ if (meth >= EXT2_N_METHODS) {
+ ext2_msg(inode->i_sb,
+ "Ass.: illegal method id",
+ "inode = %lu, id = %u", inode->i_ino, meth);
+ result = -EXT2_ECOMPR;
+ goto out_err;
+ }
+#ifdef CONFIG_KMOD
+ alg = ext2_method_table[meth].alg;
+ if (!ext2_algorithm_table[alg].avail) {
+ char str[32];
+
+ sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
+ request_module(str);
+ }
+#endif
+ }
+
+ result = -EINTR;
+
+ /*
+ * Then, decompress and copy back data.
+ */
+ {
+ int ic;
+
+ for (ic = 0, i = 0; i < clu_nblocks; i++) {
+ if (bh[i]->b_blocknr != 0) {
+ bhc[ic] = bh[i];
+ ic++;
+ if (ic == c_nblk) {
+ break;
+ }
+ }
+ }
+ }
+
+
+#ifdef EXT2_COMPR_REPORT_WA
+ printk(KERN_DEBUG "pid %d locks wa\n", current->pid);
+#endif
+ if (get_cpu_var(ext2_rd_wa) == NULL)
+ {
+ ext2_alloc_rd_wa();
+ }
+ assert(__get_cpu_var(ext2_rd_wa) != NULL);
+
+ result = ext2_decompress_blocks(inode, bhc, c_nblk,
+ le32_to_cpu(head->ulen), cluster);
+ if (result != (int) le32_to_cpu(head->ulen)) {
+ if (result >= 0) {
+ /* I think this is impossible, as
+ ext2_decompress_blocks() checks against
+ head->ulen. */
+ printk(KERN_WARNING "Unexpected return value %d "
+ "from ext2_decompress_blocks()\n", result);
+ result = -EXT2_ECOMPR;
+ }
+
+#ifdef EXT2_COMPR_REPORT_WA
+ printk(KERN_DEBUG "pid %d unlocks wa\n", current->pid);
+#endif
+ put_cpu_var(ext2_rd_wa);
+ goto out_err;
+ }
+
+#ifdef EXT2_COMPR_REPORT
+ printk(KERN_DEBUG "ext2: %04x:%lu: cluster %d+%d [%d] "
+ "decompressed into %d bytes\n",
+ inode->i_rdev,
+ inode->i_ino, clu_block0, clu_nblocks, c_nblk, result);
+#endif
+
+ /* Copy back decompressed data. */
+ {
+ int count = result;
+ unsigned char const *src;
+ int c, p;
+ int cbh;
+ int n; /* block index in page */
+ struct buffer_head *bp;
+ unsigned addr0, b_start, b_end;
+
+ assert(count > 0);
+ if (d_npg == -1) {
+ d_npg = ((count - 1) >> PAGE_CACHE_SHIFT) + 1;
+ }
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c
+ ("ext2_decompress_cluster: cnt=%d free_ix=%d d_npg=%d nbh=%d\n",
+ count, free_ix, d_npg, nbh);
+#endif
+ result = -EXT2_ECOMPR;
+ src = __get_cpu_var(ext2_rd_wa)->u;
+ cbh = 0;
+ for (c = 0; c < clu_nblocks; c++) {
+
+ if (bh[c]->b_blocknr == 0) {
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c("\t clear buf %d sts=0x%x\n", c,
+ (int) bh[c]->b_state);
+#endif
+ restore_b_data_himem(bh[c]);
+ memset(bh[c]->b_data, 0, inode->i_sb->s_blocksize);
+ continue;
+ }
+ if (cbh >= (nbh - 1)) {
+ break;
+ }
+ if (count < inode->i_sb->s_blocksize) {
+ put_cpu_var(ext2_rd_wa);
+ goto out_err;
+ }
+ cbh++;
+ count -= inode->i_sb->s_blocksize;
+ p = c >> (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+ if (!PageUptodate(pg[p])) {
+ addr0 = (clu_block0 << inode->i_sb->s_blocksize_bits);
+ b_start = addr0 + (c << inode->i_sb->s_blocksize_bits);
+ b_end = b_start + inode->i_sb->s_blocksize;
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c("\t[%d] sts=0x%x e=%d s=%d sz=%d pg:%lu(%#x)\n",
+ c, (int) bh[c]->b_state, b_end, b_start,
+ (int) inode->i_size, pg[p]->index,
+ (unsigned int) pg[p]);
+#endif
+ if (b_end <= inode->i_size) {
+ /* Block is before end of file, copy data */
+ restore_b_data_himem(bh[c]);
+ memcpy(bh[c]->b_data, src, inode->i_sb->s_blocksize);
+
+ } else if (b_start < inode->i_size) {
+ /* Block contains end of file, copy to end */
+ restore_b_data_himem(bh[c]);
+ memcpy(bh[c]->b_data, src, inode->i_size - b_start);
+
+ }
+ set_buffer_uptodate(bh[c]);
+ set_buffer_dirty(bh[c]);
+ bh_writeout[bhn_writeout] = bh[c]; //mw
+ bhn_writeout++; //mw
+ } else {
+ //mw: DEBUG. buffer is uptodate now. compress will not reread! an get the compressed data!!!
+ // clear flag in extra page!!!
+ // clear_bit(BH_Uptodate, &bh[c]->b_state);
+
+ n = c & ((PAGE_CACHE_SIZE - 1) >> inode->i_sb->
+ s_blocksize_bits);
+ bp = page_buffers(pg[p]);
+ for (i = 0; i < n; i++) {
+ bp = bp->b_this_page;
+ }
+ result = ext2_get_block(inode, clu_block0 + c, bp, 0);
+
+ //mw: needed to do a writeback of the non-epg-buffers
+ //no idea how it was done before
+ set_buffer_uptodate(bp);
+ set_buffer_dirty(bp);
+ bh_writeout[bhn_writeout] = bp; //mw
+ bhn_writeout++; //mw
+
+ if (bp->b_blocknr == 0) {
+ put_cpu_var(ext2_rd_wa);
+ goto out_err;
+ }
+ assert(bp->b_blocknr == bh[c]->b_blocknr);
+ }
+ src += inode->i_sb->s_blocksize;
+ }
+ if (count > inode->i_sb->s_blocksize) {
+ put_cpu_var(ext2_rd_wa);
+ goto out_err;
+ }
+ p = c >> (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+ if (!PageUptodate(pg[p])) {
+ addr0 = (clu_block0 << inode->i_sb->s_blocksize_bits);
+ b_start = addr0 + (c << inode->i_sb->s_blocksize_bits);
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c("\t[%d] sts=0x%x c=%d s=%d sz=%d pg:%lu(%#x)\n", c,
+ (int) bh[c]->b_state, count, b_start,
+ (int) inode->i_size, pg[p]->index,
+ (unsigned int) pg[p]);
+#endif
+ if (b_start >= inode->i_size) {
+ restore_b_data_himem(bh[c]);
+ memset(bh[c]->b_data, 0, inode->i_sb->s_blocksize);
+
+ } else {
+ if ((inode->i_size - b_start) < count) {
+ restore_b_data_himem(bh[c]);
+ memcpy(bh[c]->b_data, src, inode->i_size - b_start);
+ memset(bh[c]->b_data + (inode->i_size - b_start), 0,
+ count - (inode->i_size - b_start));
+ } else {
+ restore_b_data_himem(bh[c]);
+ memcpy(bh[c]->b_data, src, count);
+ }
+ }
+ set_buffer_uptodate(bh[c]);
+ set_buffer_dirty(bh[c]);
+ bh_writeout[bhn_writeout] = bh[c]; //mw
+ bhn_writeout++; //mw
+ } else {
+ assert(epg[p] != NULL); //mw
+ n = c & ((PAGE_CACHE_SIZE - 1) >> inode->i_sb->
+ s_blocksize_bits);
+ bp = page_buffers(pg[p]);
+ for (i = 0; i < n; i++) {
+ bp = bp->b_this_page;
+ }
+ result = ext2_get_block(inode, clu_block0 + c, bp, 0);
+
+ //mw: needed to do a writeback of the non-epg-buffers
+ //no idea how it was done before
+ set_buffer_uptodate(bp);
+ set_buffer_dirty(bp);
+ bh_writeout[bhn_writeout] = bp; //mw
+ bhn_writeout++; //mw
+ if (bp->b_blocknr == 0) {
+ put_cpu_var(ext2_rd_wa);
+ goto out_err;
+ }
+ assert(bp->b_blocknr == bh[c]->b_blocknr);
+ }
+ result = (nbh - 1) * inode->i_sb->s_blocksize + count;
+ }
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (pg[i] == NULL)
+ break;
+ if (i < d_npg)
+ SetPageUptodate(pg[i]);
+ }
+
+#ifdef EXT2_COMPR_REPORT_WA
+ printk(KERN_DEBUG "pid %d unlocks wa\n", current->pid);
+#endif
+ put_cpu_var(ext2_rd_wa);
+
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+ /* If needed, EXT2_DIRTY_FL is raised by the caller. */
+
+#if 0
+ /* TODO: SYNC */
+ if (IS_SYNC(inode)) {
+ generic_osync_inode(inode, inode->i_mapping,
+ OSYNC_METADATA | OSYNC_DATA);
+ }
+#endif
+ assert(result >= 0);
+
+ //Sync out changes:
+ assert(bhn_writeout <= EXT2_MAX_CLUSTER_BLOCKS);
+ assert(bhn_writeout >= 0);
+
+ //mw: debug
+ for (i = 0; i < bhn_writeout; i++) {
+ if ((!buffer_mapped(bh_writeout[i]))
+ || (bh_writeout[i]->b_bdev == NULL)) {
+ u32 block = ext2_cluster_block0(inode, cluster);
+ ext2_get_block(inode, block + i, bh_writeout[i], 1);
+ //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh_writeout[i]->b_blocknr, (bh_writeout[i]->b_state & BH_Mapped), (bh_writeout[i]->b_page ? bh_writeout[i]->b_page->index : 0), bh_writeout[i]->b_bdev );
+ }
+ assert(buffer_mapped(bh_writeout[i]));
+ assert(bh_writeout[i]->b_bdev != NULL);
+ assert(bh_writeout[i]->b_bdev == inode->i_sb->s_bdev);
+ /*if (bh_writeout[i]->b_bdev == NULL)
+ bh_writeout[i]->b_bdev = inode->i_sb->s_bdev; //fix bdev-bug */
+ }
+
+ ll_rw_block(WRITE, bhn_writeout, bh_writeout);
+ //mw: seems we have to wait here, otherwise: crash!
+
+ CHECK_NOT_ATOMIC
+ for (i = 0; i < bhn_writeout; i++) {
+ if (bh_writeout[i])
+ wait_on_buffer(bh_writeout[i]);
+ }
+ goto cleanup;
+
+ out_err:
+ printk("Error in Decompressing cluster: Err=%i\n", result);
+
+ cleanup:
+
+#ifdef CONFIG_HIGHMEM
+ if (kmapped)
+ ext2_kunmap_cluster_pages(NULL, pg, epg);
+#endif
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (pg[i] == NULL)
+ break;
+ unlock_page(pg[i]);
+ page_cache_release(pg[i]);
+ }
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (epg[i] != NULL) {
+ ClearPageDirty(epg[i]);
+ ClearPageUptodate(epg[i]);
+ try_to_free_buffers(epg[i]);
+ unlock_page(epg[i]);
+ assert(page_count(epg[i]) == 1);
+ page_cache_release(epg[i]);
+ }
+ }
+
+ /*
+ * Release buffers, don't forget to unlock the locked ones.
+ * pjm 1998-01-14: TO_DO: Locked ones?
+ */
+ assert(nbh >= 0);
+ assert(nbh <= EXT2_MAX_CLUSTER_BLOCKS);
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c(" < < < ext2_decompress_cluster %d: inode=%ld, res=%i\n",
+ cluster, inode->i_ino, result);
+#endif
+ return result;
+}
+
+
+/*
+ * Function to decompress the pages of a cluster.
+ *
+ * Allocate buffers to pages what are not mapped on the device.
+ *
+ * Returns the size of decompressed data on success, a negative
+ * value in case of failure, or 0 if some pages are not uptodate.
+ *
+ * The inode is supposed to be writable.
+ * All the pages must be UPTODATE,
+ */
+int ext2_decompress_pages(struct inode *inode, u32 cluster,
+ struct page *pg[])
+{
+ struct ext2_cluster_head *head;
+ struct buffer_head *bh0;
+ struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
+ unsigned nbh, c_nblk;
+ unsigned free_ix, clu_block0, clu_nblocks;
+ int i, pagesPerCluster, data_left, size = 0;
+ long status = 0;
+ char *dp;
+ struct buffer_head *bh_writeout[EXT2_MAX_CLUSTER_BLOCKS];
+ int bhn_writeout;
+#ifdef CONFIG_HIGHMEM
+ int kmapped = 0;
+
+ ext2_kmap_cluster_pages(NULL, pg, NULL);
+ kmapped = 1;
+#endif
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
+ bh_writeout[i] = NULL;
+ bhn_writeout = 0;
+ }
+
+ /* First, get cluster_head (For this, we need to re-read the first block of
+ the cluster, without overwriting the data of the page the buffer point to... */
+ /* This suppose that cluster are aligned with PAGE_SIZE... To be improved */
+
+ /* Changed by Yabo Ding<bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
+ The old code cannot reread data from disk to a changed buffers data pointer in 2.6.x.
+ So, I copy memory data(decompressed) to a temporary buffer;
+ Then reread data(compressed) from disk, and copy to head;
+ Then copy back the memory data from temporary buffer.
+ It seems clumsy, but it works well.
+ */
+
+ bh0 = page_buffers(pg[0]);
+ restore_b_data_himem(bh0);
+
+ head = (struct ext2_cluster_head *) kmalloc(bh0->b_size, GFP_KERNEL);
+ if (head == NULL) {
+ ext2_msg(inode->i_sb, "no more memory", "inode = %lu",
+ inode->i_ino);
+ status = -EIO;
+ goto out_x;
+ }
+ dp = kmalloc(bh0->b_size, GFP_KERNEL);
+ if (dp == NULL) {
+ ext2_msg(inode->i_sb, "no more memory", "inode = %lu",
+ inode->i_ino);
+ kfree(head);
+ status = -EIO;
+ goto out_x;
+ }
+ memcpy(dp, bh0->b_data, bh0->b_size);
+ clear_bit(BH_Uptodate, &bh0->b_state);
+ if (!buffer_mapped(bh0)) {
+ status =
+ ext2_get_block(inode, ext2_cluster_block0(inode, cluster), bh0,
+ 0);
+ if (bh0->b_blocknr == 0) {
+ trace_e2c
+ ("ext2_decompress_pages: ext2_get_block error %ld (cluster = %u)\n",
+ status, cluster);
+ kfree(head);
+ memcpy(bh0->b_data, dp, bh0->b_size);
+ kfree(dp);
+ status = -EIO;
+ goto out;
+ }
+ }
+ ll_rw_block(READ, 1, &bh0);
+
+ CHECK_NOT_ATOMIC
+ wait_on_buffer(bh0);
+ //printk("RE-Read: Buffer: blocknr:%lu(%#x) \n", bh0->b_blocknr, bh0);
+ if (!buffer_uptodate(bh0)) { /* Read error ??? */
+ trace_e2c("ext2_decompress_pages: IO error (cluster = %u)\n",
+ cluster);
+ kfree(head);
+ memcpy(bh0->b_data, dp, bh0->b_size);
+ kfree(dp);
+ status = -EIO;
+ goto out;
+ }
+ /* This suppose that cluster are aligned with PAGE_SIZE... To be improved
+ bh0->b_data = page_address(pg[0]); */
+ memcpy((char *) head, bh0->b_data, bh0->b_size);
+ memcpy(bh0->b_data, dp, bh0->b_size);
+ kfree(dp);
+
+ if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
+ ext2_msg(inode->i_sb,
+ "ext2_decompress_pages: bad magic number",
+ "inode = %lu, magic = %#04x", inode->i_ino,
+ le16_to_cpu(head->magic));
+ kfree(head);
+ status = -EIO;
+ goto out;
+ }
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c("ext2_decompress_pages: clt=%d i=%ld head=0x%x\n", cluster,
+ inode->i_ino, (unsigned) head);
+#endif
+
+ /* Now, try to do the same as in ext2_decompress_cluster for moving/allocating blocks */
+ nbh = 0;
+ pagesPerCluster = ext2_cluster_npages(inode, cluster);
+ for (i = 0; i < pagesPerCluster && pg[i]; i++) {
+ assert(PageLocked(pg[i]));
+ //if (!(PageUptodate(pg[i]))) {
+ //mw: do it like ext2_decompress_cluster to handle end of a file correctly
+ if (!(PageUptodate(pg[i]))
+ && (pg[i]->index <= ((inode->i_size - 1) >> PAGE_CACHE_SHIFT))) {
+ kfree(head);
+ printk("should never happen: not all pages uptodate!\n"); //mw
+ status = 0;
+ goto out_x;
+ }
+ }
+
+ for (i = 0; i < pagesPerCluster && pg[i]; i++) {
+ struct buffer_head *bhead, *bhx;
+ int idx = 0;
+
+ /* assert(PageUptodate(pg[i])); with ftruncate() can be false */
+ if (!page_has_buffers(pg[i])) {
+ ClearPageUptodate(pg[i]); /*mw */
+ ClearPageDirty(pg[i]); /*mw */
+ assert(0);
+ create_empty_buffers_e2c(pg[i], inode->i_sb->s_blocksize, 0,
+ inode);
+ if (unlikely(!page_has_buffers(pg[i])))
+ printk("Error: NOMEM!\n");
+ }
+ bhead = page_buffers(pg[i]);
+ for (bhx = bhead; bhx != bhead || !idx; bhx = bhx->b_this_page) {
+ idx++;
+ bh[nbh] = bhx;
+ nbh++;
+ }
+ }
+
+ while ((nbh != 0) && (bh[nbh - 1]->b_blocknr == 0))
+ --nbh;
+
+ c_nblk = nbh;
+
+ free_ix =
+ ext2_calc_free_ix(head->holemap_nbytes, (u8 const *) (&head[1]),
+ c_nblk);
+ clu_block0 = ext2_cluster_block0(inode, cluster);
+ clu_nblocks = ext2_cluster_nblocks(inode, cluster);
+ ext2_unpack_blkaddrs(inode, bh, 0, head->holemap_nbytes,
+ (u8 const *) (&head[1]), c_nblk, free_ix,
+ clu_block0, clu_nblocks);
+
+ /* Allocate the extra blocks needed. */
+ data_left = size = le32_to_cpu(head->ulen);
+
+ data_left -= c_nblk << inode->i_sb->s_blocksize_bits;
+ assert(data_left > 0);
+ for (i = 0; i < free_ix; i++) {
+ if (bh[i]->b_blocknr != 0) {
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c("\t [%d] blk=%ld sts=0x%x\n", i, bh[i]->b_blocknr,
+ (int) bh[i]->b_state);
+#endif
+ set_buffer_dirty(bh[i]);
+ bh_writeout[bhn_writeout] = bh[i]; //mw
+ bhn_writeout++; //mw
+ }
+ }
+
+ for (i = free_ix; i < clu_nblocks; i++) {
+ if (((i >> 3) >= head->holemap_nbytes)
+ || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
+ status =
+ ext2_get_block(inode, clu_block0 + i, bh[i],
+ 1 /* create */ );
+ if (status || bh[i]->b_blocknr == 0) {
+ status = -EIO;
+ goto out;
+ }
+#ifdef EXT2_COMPR_REPORT_CPR
+ trace_e2c("\t [%d] blk=%ld sts=0x%x\n", i, bh[i]->b_blocknr,
+ (int) bh[i]->b_state);
+#endif
+ set_bit(BH_Uptodate, &bh[i]->b_state);
+ set_buffer_dirty(bh[i]);
+ bh_writeout[bhn_writeout] = bh[i]; //mw
+ bhn_writeout++; //mw
+ nbh++;
+ data_left -= inode->i_sb->s_blocksize;
+ if (data_left <= 0)
+ break;
+ }
+ }
+
+ out:
+ kfree(head);
+
+ out_x:
+
+ for (i = 0; i < bhn_writeout; i++) {
+
+ if ((!buffer_mapped(bh_writeout[i]))
+ || (bh_writeout[i]->b_bdev == NULL)) {
+ u32 block = ext2_cluster_block0(inode, cluster);
+ ext2_get_block(inode, block + i, bh_writeout[i], 1);
+ //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh_writeout[i]->b_blocknr, (bh_writeout[i]->b_state & BH_Mapped), (bh_writeout[i]->b_page ? bh_writeout[i]->b_page->index : 0), bh_writeout[i]->b_bdev );
+ }
+ assert(buffer_mapped(bh_writeout[i]));
+ assert(bh_writeout[i]->b_bdev != NULL);
+ assert(bh_writeout[i]->b_bdev == inode->i_sb->s_bdev);
+ /*if (bh_writeout[i]->b_bdev == NULL)
+ bh_writeout[i]->b_bdev = inode->i_sb->s_bdev; //fix bdev-bug */
+ }
+ //Sync out changes:
+ ll_rw_block(WRITE, bhn_writeout, bh_writeout);
+ //mw: seems we have to wait here, otherwise: crash!
+
+ CHECK_NOT_ATOMIC
+ for (i = 0; i < bhn_writeout; i++) {
+ if (bh_writeout[i])
+ wait_on_buffer(bh_writeout[i]);
+ }
+
+
+#ifdef CONFIG_HIGHMEM
+ if (kmapped)
+ ext2_kunmap_cluster_pages(NULL, pg, NULL);
+#endif
+
+ return (status ? status : size);
+}
+
+
+/* Decompress every cluster that is still compressed.
+ We stop and return -ENOSPC if we run out of space on device.
+
+ The caller needs to check for EXT2_COMPRBLK_FL before calling.
+
+ Returns 0 on success, -errno on failure.
+
+ Called by ext2_ioctl(). */
+int ext2_decompress_inode(struct inode *inode)
+{
+ u32 cluster;
+ u32 n_clusters;
+ int err = 0;
+ struct ext2_inode_info *ei = EXT2_I(inode);
+
+ assert(ei->i_flags & EXT2_COMPRBLK_FL);
+
+ /* Quotas aren't otherwise kept if file is opened O_RDONLY. */
+ dquot_initialize(inode);
+
+ //mutex_lock(&inode->i_mutex); /* MW 5-16-07 */
+ assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
+ err = 0;
+ /* This test can succeed because down() (and I think DQUOT_INIT) can block. */
+ if (!(ei->i_flags & EXT2_COMPRBLK_FL))
+ goto out;
+
+ n_clusters = ext2_n_clusters(inode);
+ for (cluster = 0; cluster < n_clusters; cluster++) {
+ err = ext2_cluster_is_compressed_fn(inode, cluster);
+ if (err > 0) {
+ err = ext2_decompress_cluster(inode, cluster);
+ /* If we later get an error, we'll need to recompress. */
+ ei->i_flags |= EXT2_DIRTY_FL;
+ ei->i_compr_flags |= EXT2_CLEANUP_FL;
+ }
+ if (err < 0)
+ goto error;
+ }
+ assert(err >= 0);
+ err = 0;
+ ei->i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL);
+ ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
+ error:
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+ out:
+// mutex_unlock(&inode->i_mutex); /* MW 5-16-07 */
+ return err;
+}
+
+
+/*
+ TODO: SECRM_FL
+
+ TODO: Avant de liberer les blocs, regarder si le compteur
+ est a 1, et marquer le noeud si ce n'est pas le cas
+ (pour preparer la recompression immediate).
+
+ pjm fixme translation.
+ "Before freeing the blocks, check if the counter is 1,
+ and mark the inode if not (in order to prepare for
+ immediate recompression)." */
+
+/* This is called by ext2_compress_cluster to free the blocks now
+ available due to compression. We free ,nb` blocks beginning with
+ block ,block`. We set the address of each freed block to
+ EXT2_COMPRESSED_BLKADDR, thus marking the cluster as compressed.
+ N.B. It is up to the caller to adjust i_blocks. */
+
+/* TODO: ext2_truncate() is much more careful than this routine.
+ (E.g. it checks for bh->b_count > 1, and checks for things changing
+ underneath it. It also calls bforget instead of brelse if it's
+ going to free it.) Why? Maybe we should copy it. */
+
+/* effic: Reduce the number of calls to ext2_free_block() the way
+ ext2_trunc_direct() does. */
+
+/* fixme: I think tht we do indeed need to check if buffers are held by
+ somebody else before freeing them. */
+static int ext2_free_cluster_blocks(struct inode *inode, u32 block,
+ unsigned nb)
+{
+ u32 tmp;
+ struct ext2_bkey key;
+ int err;
+
+/*
+ * whitpa 04 Oct 2004: although it may be true that using e2compr in
+ * conjunction with quotas is a Bad Idea, having quotas enabled for other
+ * filesystems doesn't necessarily mean that the quota feature will actually be
+ * used in this one, so many people find the following assertion very annoying.
+ * I have therefore disabled it.
+ */
+/* assert (!inode->i_sb->dq_op || (inode->i_flags & S_QUOTA)); */
+ if (!nb)
+ return 0;
+ if (nb > EXT2_MAX_CLU_NBLOCKS) {
+ assert((int) nb >= 0);
+ assert(nb <= EXT2_MAX_CLU_NBLOCKS);
+ return -EDOM;
+ }
+ assert(((block + nb) & 3) == 0);
+ if (!ext2_get_key(&key, inode, block))
+ return -EIO;
+
+ while (nb-- > 0) {
+ tmp = ext2_get_key_blkaddr(&key);
+ err = ext2_set_key_blkaddr(&key, EXT2_COMPRESSED_BLKADDR);
+ if (err)
+ goto out;
+ if (tmp != 0) {
+ assert(tmp != EXT2_COMPRESSED_BLKADDR);
+#ifdef EXT2_COMPR_REPORT_ALLOC
+ printk(KERN_DEBUG "ext2: free %d = (%d) %d:%d:%d:%d : %d\n",
+ key.block,
+ key.level,
+ key.off[0], key.off[1], key.off[2], key.off[3], tmp);
+#endif
+ ext2_free_blocks(inode, tmp, 1);
+ }
+ if (!ext2_next_key(&key, 1))
+ break;
+ }
+ err = 0;
+ out:
+ ext2_free_key(&key);
+ return err;
+}
+
+#ifdef EXT2_COMPR_DEBUG
+static unsigned count_bits(unsigned char *p, unsigned nb)
+{
+ u32 x = le32_to_cpu(*(u32 *) p);
+ unsigned n = 0;
+
+ assert(nb <= 4);
+ if (nb != 4)
+ x &= (1 << (nb * 8)) - 1;
+ while (x) {
+ x &= (x - 1);
+ n++;
+ }
+ return n;
+}
+#endif
+
+/*
+ * __remove_compr_assoc_queue is used in invalidate_inode_buffers
+ * replacement code for ext2_compress_cluster(). TLL 02/21/07
+ * Yeah, it is duplicate code, but using it does not require
+ * patching fs/buffer.c/__remove_assoc_queue to export it.
+ * The buffer's backing address_space's private_lock must be held.
+ */
+/*static inline void __remove_compr_assoc_queue(struct buffer_head *bh)
+{
+ list_del_init(&bh->b_assoc_buffers);
+}*/
+
+/* Compress one cluster. If the cluster uses fewer blocks once
+ compressed, it is stored in place of the original data. Unused
+ blocks are freed, and the cluster is marked as compressed.
+
+ Returns a negative value on error,
+ 0 if the cluster does not compress well,
+ positive if it is compressed (whether it was already compressed
+ or whether we compressed it).
+
+ Assume inode is writable.
+
+ Called by :
+
+ ext2_cleanup_compressed_inode () [i_sem]
+
+ If ever we acquire new callers, make sure that quotas are
+ initialised, and COMPRBLK is handled correctly (i.e. such
+ that ioctl() can't change the cluster size on us), and that caller
+ tests for ext2_wa==NULL.
+*/
+
+int ext2_compress_cluster(struct inode *inode, u32 cluster)
+{
+ struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS + 1];
+ struct page *pg[EXT2_MAX_CLUSTER_PAGES];
+ int s_nblk; /* Equals clu_nblocks less any trailing hole blocks. */
+ unsigned u_nblk = (~(unsigned) 0), c_nblk; /* Number of blocks occupied by
+ un/compressed data. */
+ int result, n, x;
+ int ulen, maxlen = 0, clen = 0;
+ unsigned char *dst;
+ u8 *src;
+ unsigned meth, alg;
+ int nbh = 0, npg, i;
+ unsigned char holemap_nbytes = 0;
+ unsigned last_hole_pos;
+ struct ext2_cluster_head *head;
+ unsigned r_nblk;
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ unsigned long saved_isize;
+ //int dotrunc = 1; //mw
+
+#ifdef CONFIG_HIGHMEM
+ int kmapped = 0;
+#endif
+
+ /* impl: Otherwise, ioctl() could change the cluster size
+ beneath us. */
+ /* TLL say not compressed and return -1 6-15-07 */
+ if (!(ei->i_flags & EXT2_COMPRBLK_FL))
+ return -1;
+
+ //mw
+ saved_isize = inode->i_size;
+
+ assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
+ assert(!mapping_mapped(inode->i_mapping));
+
+ npg = ext2_cluster_npages(inode, cluster);
+
+ result = ext2_get_cluster_pages(inode, cluster, pg, NULL, 1);
+ if (result <= 0)
+ goto done;
+
+#ifdef CONFIG_HIGHMEM
+ ext2_kmap_cluster_pages(NULL, pg, NULL);
+ kmapped = 1;
+#endif
+
+ /* effic: We ought to use the page cache. Using the page
+ cache always costs extra CPU time, but saves I/O if the
+ page is present. We still need to detect holes, which
+ unfortunately may still cause I/O. Testing for all-zero
+ could save us that I/O. */
+
+ nbh = ext2_get_cluster_blocks(inode, cluster, bh, pg, NULL, 1);
+
+ s_nblk = nbh;
+
+#ifdef EXT2_COMPR_REPORT
+ {
+ int i;
+ trace_e2c(" > > > ext2_compress_cluster %d: inode=%ld, size=%d\n",
+ cluster, inode->i_ino, (int) inode->i_size);
+#ifdef EXT2_COMPR_REPORT_CPR
+ for (i = 0; i < s_nblk; i++) {
+ if (bh[i]) {
+ printk(KERN_DEBUG
+ "bbuffer_head[%d]: blocknr=%lu, addr=0x%p ", i,
+ (unsigned long) bh[i]->b_blocknr, bh[i]);
+ if (bh[i]->b_page)
+ printk(KERN_DEBUG "bgn:[page->index=%ld]\n",
+ bh[i]->b_page->index);
+ else
+ printk(KERN_DEBUG "[No page]\n");
+ } else
+ printk("bbuffer_head[%d] is NULL\n", i);
+ }
+#endif
+ }
+#endif
+ /*
+ * Did somebody else compress the cluster while we were waiting ?
+ * This should never arise ...
+ */
+ result = ext2_cluster_is_compressed_fn(inode, cluster);
+ if (result != 0) {
+ if (result > 0) {
+ ext2_msg(inode->i_sb,
+ "ext2_compress_cluster",
+ "compressing compressed cluster");
+ }
+ goto done;
+ }
+
+ /* I moved it here in case we need to load a module that
+ * needs more heap that is currently allocated.
+ * In such case "init_module" for that algorithm forces
+ * re-allocation of ext2_wa. It should be safe here b/c the
+ * first reference to ext2_wa comes just after and we have
+ * locked ext2_wa before.
+ *
+ * I know that we may not need the compression at all
+ * (compressing 0 or 1 block) but it's better to sacrifice
+ * a bit than do make a total mess of this code.
+ *
+ * FIXME: Totally separate working areas for reading and writing.
+ * Jan R.
+ */
+
+ meth = ei->i_compr_method;
+ assert(meth < EXT2_N_METHODS);
+ alg = ext2_method_table[meth].alg;
+#ifdef CONFIG_KMOD
+ if (!ext2_algorithm_table[alg].avail) {
+ char str[32];
+
+ sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
+ request_module(str);
+ }
+#endif
+
+ result = -EINTR;
+
+ /*
+ * Try to get the working area.
+ */
+#ifdef EXT2_COMPR_REPORT_WA
+ printk(KERN_DEBUG "pid %d enters critical region\n", current->pid);
+#endif
+ if (get_cpu_var(ext2_wr_wa) == NULL)
+ {
+ ext2_alloc_wr_wa();
+ }
+ assert(__get_cpu_var(ext2_wr_wa) != NULL);
+
+
+ /*
+ * Now, we try to compress the cluster. If the cluster does
+ * not compress well, we just give up. Otherwise, we reuse
+ * the old blocks to store the compressed data (except that
+ * compressed data is contiguous in the file even if the
+ * uncompressed data had holes).
+ */
+
+ /*
+ * Compute the block bitmap, how many bytes of data we have
+ * in the cluster, and the maximum interesting length after
+ * compression. The bitmap will be used to reallocate blocks
+ * when decompressing the cluster, so that we don't create blocks
+ * that were previously missing. We also pack the buffers
+ * together.
+ */
+
+ head = (struct ext2_cluster_head *) __get_cpu_var(ext2_wr_wa)->c;
+#if EXT2_MAX_CLUSTER_BLOCKS > 32
+# error "We need to zero more bits than this."
+#endif
+ *(u32 *) (&head[1]) = 0;
+ last_hole_pos = (unsigned) (-1);
+ assert(head->holemap[0] == 0);
+ assert(head->holemap[1] == 0);
+ assert(head->holemap[2] == 0);
+ assert(head->holemap[3] == 0);
+ assert(*(u32 *) head->holemap == 0);
+ assert(count_bits(head->holemap, 4) == 0);
+
+ /* TODO: Check that i_size can't change beneath us.
+ do_truncate() is safe because it uses i_sem around changing
+ i_size. For the moment, I do a runtime check. */
+
+ saved_isize = inode->i_size;
+
+#ifdef EXT2_COMPR_REPORT_VERBOSE
+ printk
+ ("00 ext2_compress_cluster[%u]: i_size=%u, s_blocksize_bits=%u, s_nblk=%u\n",
+ __LINE__, (unsigned) inode->i_size, inode->i_sb->s_blocksize_bits,
+ s_nblk);
+#endif
+// assert (ROUNDUP_RSHIFT(inode->i_size, inode->i_sb->s_blocksize_bits)
+// >= s_nblk);
+ /* This initial guess at ulen doesn't take holes into account
+ unless they're at end of cluster. We ,compensate for other
+ holes` during the loop below. */
+ ulen = MIN(s_nblk << inode->i_sb->s_blocksize_bits,
+ inode->i_size - ext2_cluster_offset(inode, cluster));
+ r_nblk = (((ulen - 1) >> inode->i_sb->s_blocksize_bits) + 1);
+ if (r_nblk <= 1) {
+ /* MW: required to remove Z flag, otherwise compress
+ * is tried on each access */
+ result = 0;
+ goto no_compress;
+ }
+ /* Verify if more than 1 block to compress in the cluster */
+ nbh = 0;
+ for (x = 0; x < s_nblk; x++) {
+ if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
+ nbh++;
+ } else {
+ last_hole_pos = x;
+ head->holemap[x >> 3] |= 1 << (x & 7);
+ ulen -= inode->i_sb->s_blocksize;
+ /* impl: We know that it's a whole block because
+ ext2_get_cluster_blocks trims s_nblk for trailing
+ NULL blocks, and partial blocks only come at
+ the end, so there can't be partial NULL blocks. */
+ }
+ }
+ /* We don't try to compress cluster that only have one block
+ or no block at all. (When fragments are implemented, this code
+ should be changed.) */
+ if (nbh <= 1) {
+ /* MW: required to remove Z flag, otherwise compress
+ * is tried on each access */
+ goto no_compress;
+ }
+
+ u_nblk = nbh;
+ /* Copy the data in the compression area */
+ dst = __get_cpu_var(ext2_wr_wa)->u;
+ for (x = 0; x < s_nblk; x++) {
+ if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
+ restore_b_data_himem(bh[x]);
+ memcpy(dst, bh[x]->b_data, bh[x]->b_size);
+ dst += bh[x]->b_size;
+ }
+ }
+
+ assert(count_bits(head->holemap, 4) == s_nblk - u_nblk);
+
+#if EXT2_GRAIN_SIZE != EXT2_MIN_BLOCK_SIZE
+# error "this code ought to be changed"
+#endif
+
+ /* ,maxlen` is the maximum length that the compressed data can
+ be while still taking up fewer blocks on disk. */
+ holemap_nbytes = (last_hole_pos >> 3) + 1;
+ /* impl: Remember that ,last_hole_pos` starts off as being -1,
+ so the high 3 bits of ,last_hole_pos >> 3` can be wrong.
+ This doesn't matter if holemap_nbytes discards the high
+ bits. */
+
+ assert(sizeof(holemap_nbytes) < sizeof(unsigned));
+ assert((last_hole_pos == (unsigned) -1)
+ == (holemap_nbytes == 0));
+ maxlen =
+ ((((r_nblk <
+ u_nblk) ? r_nblk : u_nblk) - 1) * inode->i_sb->s_blocksize -
+ sizeof(struct ext2_cluster_head)
+ - holemap_nbytes);
+ clen = 0;
+ /* Handling of EXT2_AUTO_METH at the moment is just that we
+ use the kernel default algorithm. I hope that in future
+ this can be extended to the kernel deciding when to
+ compress and what algorithm to use, based on available disk
+ space, CPU time, algorithms currently used by the fs,
+ etc. */
+ if ((meth == EXT2_AUTO_METH)
+ || !ext2_algorithm_table[alg].avail) {
+ meth = EXT2_DEFAULT_COMPR_METHOD;
+ alg = ext2_method_table[meth].alg;
+ assert(ext2_algorithm_table[alg].avail);
+ }
+ if (alg == EXT2_NONE_ALG)
+ goto no_compress;
+
+ clen = ext2_algorithm_table[alg].compress(__get_cpu_var(ext2_wr_wa)->u,
+ __get_cpu_var(ext2_wr_wa)->c + sizeof(struct ext2_cluster_head) + holemap_nbytes,
+ __get_cpu_var(ext2_wr_wa)->heap, ulen, maxlen, ext2_method_table[meth].xarg);
+
+#ifdef EXT2_COMPR_REPORT_ALGORITHMS
+ printk(KERN_DEBUG "03 ext2: %lu: cluster %d+%d [%d] compressed "
+ "into %d bytes (ulen = %d, maxlen = %d)\n",
+ inode->i_ino,
+ ext2_cluster_offset(inode, cluster),
+ ext2_cluster_nblocks(inode, cluster),
+ u_nblk, clen, ulen, maxlen);
+#endif
+
+ if ((clen == 0) || (clen > maxlen)) {
+ no_compress:
+
+ /* this chunk didn't compress. */
+ assert(inode->i_size == saved_isize);
+#ifdef EXT2_COMPR_REPORT_WA
+ printk(KERN_DEBUG
+ "pid %d leaves critical region, nbh=%d, u_nblk=%d, "
+ "inode->i_size=%lu, saved_isize=%lu, clen=%d, ulen=%d, maxlen=%d\n",
+ current->pid, nbh, u_nblk,
+ (long unsigned) inode->i_size, saved_isize, clen, ulen,
+ maxlen);
+#endif
+
+ result = 0;
+ put_cpu_var(ext2_wr_wa);
+ goto done;
+ }
+
+
+#if EXT2_MAX_CLUSTER_BLOCKS > 32
+# error "We need to zero more bits than this."
+#endif
+ assert(-1 <= (int) last_hole_pos);
+ assert((int) last_hole_pos < 32);
+ assert((le32_to_cpu(*(u32 *) head->holemap)
+ & (~0u << (1 + last_hole_pos))
+ & (~(~0u << (8 * holemap_nbytes))))
+ == 0);
+ /* Don't change "~0u << (1 + last_hole_pos)" to "~1u << last_hole_pos"
+ as I almost did, as last_hole_pos can be -1 and cannot be 32. */
+ assert(count_bits(head->holemap, holemap_nbytes) == s_nblk - u_nblk);
+
+ /* Compress the blocks at the beginning of the cluster */
+ for (x = 0, nbh = 0; x < s_nblk; x++) {
+ if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
+ if (nbh != x) {
+ restore_b_data_himem(bh[x]);
+ bh[nbh]->b_blocknr = bh[x]->b_blocknr;
+ set_bit(BH_Mapped, &bh[nbh]->b_state);
+ bh[x]->b_blocknr = 0;
+ assert(buffer_mapped(bh[x]));
+ clear_bit(BH_Mapped, &bh[x]->b_state);
+ }
+ nbh++;
+ }
+ }
+ assert(nbh == u_nblk);
+ assert(count_bits(head->holemap, holemap_nbytes) == s_nblk - u_nblk);
+
+ /*
+ * Compression was successful, so add the header and copy to blocks.
+ */
+
+ /* Header. */
+ {
+ head->magic = cpu_to_le16(EXT2_COMPRESS_MAGIC_04X);
+ head->method = meth;
+ head->holemap_nbytes = holemap_nbytes;
+ head->ulen = cpu_to_le32(ulen);
+ head->clen = cpu_to_le32(clen);
+
+ barrier(); //mw: "barrier" tells compiler not to re-order resulting asm statments, somehow.
+ head->checksum =
+ cpu_to_le32(ext2_adler32
+ (le32_to_cpu(*(u32 *) __get_cpu_var(ext2_wr_wa)->c),
+ __get_cpu_var(ext2_wr_wa)->c + 8,
+ (sizeof(struct ext2_cluster_head) - 8 +
+ head->holemap_nbytes + clen)));
+ }
+
+ assert((le32_to_cpu(*(u32 *) head->holemap)
+ & (~0 << (1 + last_hole_pos))
+ & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
+ result = clen += sizeof(struct ext2_cluster_head) + holemap_nbytes;
+ c_nblk = ROUNDUP_RSHIFT(clen, inode->i_sb->s_blocksize_bits);
+
+ /* Release unneeded buffer heads. (Freeing is done later,
+ after unlocking ext2_wr_wa.) */
+ assert(nbh == u_nblk);
+ nbh = c_nblk;
+
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c("ext2_compress_cluster: head->clen=%d, clen=%d\n", head->clen, clen);
+#endif
+ src = __get_cpu_var(ext2_wr_wa)->c;
+
+ for (n = 0; (int) clen > 0; n++) {
+ restore_b_data_himem(bh[n]);
+ if (clen >= inode->i_sb->s_blocksize) {
+ memcpy(bh[n]->b_data, src, inode->i_sb->s_blocksize);
+ } else {
+ memcpy(bh[n]->b_data, src, clen);
+ }
+
+ /* TO_DO: OSYNC. means: write opertions are blocking until the
+ * the pages are written from page cache to disk */
+
+ set_buffer_uptodate(bh[n]);
+ set_buffer_dirty(bh[n]);
+ src += inode->i_sb->s_blocksize;
+ clen -= inode->i_sb->s_blocksize;
+ }
+
+ i = 0;
+ assert(n == c_nblk);
+ assert((le32_to_cpu(*(u32 *) head->holemap)
+ & (~0 << (1 + last_hole_pos))
+ & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
+
+ /* Runtime check that no-one can change i_size while i_sem is down.
+ (See where saved_isize is set, above.) */
+ assert(inode->i_size == saved_isize);
+ assert(!mapping_mapped(inode->i_mapping));
+
+ /* Free the remaining blocks, and shuffle used blocks to start
+ of cluster in blkaddr array. */
+ {
+ u32 free_ix, curr;
+ int err;
+
+ /* Calculate free_ix. There should be ,c_nblk`
+ non-hole blocks among the first ,free_ix`
+ blocks. */
+ {
+ assert((le32_to_cpu(*(u32 *) head->holemap)
+ & (~0 << (1 + last_hole_pos))
+ & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
+ assert(n == c_nblk);
+ for (free_ix = 0;
+ ((int) free_ix <= (int) last_hole_pos) && (n > 0);
+ free_ix++)
+ if (!(head->holemap[free_ix >> 3]
+ & (1 << (free_ix & 7))))
+ n--;
+ free_ix += n;
+
+ if ((free_ix < c_nblk)
+ || (free_ix + u_nblk > s_nblk + c_nblk)
+ || (free_ix >= ext2_cluster_nblocks(inode, cluster))
+ || ((holemap_nbytes == 0) && (c_nblk != free_ix))) {
+ assert(free_ix >= c_nblk);
+ /*assert (free_ix - c_nblk <= s_nblk - u_nblk); */
+ assert(free_ix + u_nblk <= s_nblk + c_nblk);
+ assert(free_ix < ext2_cluster_nblocks(inode, cluster));
+ assert((holemap_nbytes != 0) || (c_nblk == free_ix));
+ assert(1 <= c_nblk);
+ assert(c_nblk < u_nblk);
+ assert(u_nblk <= s_nblk);
+ assert(s_nblk <= ext2_cluster_nblocks(inode, cluster));
+ assert(ext2_cluster_nblocks(inode, cluster) <=
+ EXT2_MAX_CLU_NBLOCKS);
+ ext2_error(inode->i_sb, "ext2_compress_cluster",
+ "re assertions: c=%d, u=%d, f=%d, s=%d, n=%d, "
+ "lhp=%d, hm=%x, hnb=%d, " "ino=%lu, clu=%u",
+ (int) c_nblk, (int) u_nblk, (int) free_ix,
+ (int) s_nblk, (int) ext2_cluster_nblocks(inode,
+ cluster),
+ (int) last_hole_pos,
+ (unsigned) le32_to_cpu(*(u32 *) head->holemap),
+ (int) holemap_nbytes, inode->i_ino, cluster);
+ }
+ }
+
+ /*mw: put here: set all __get_cpu related pointers to NULL
+ as they become invalid with put_cpu */
+ head = NULL; /* prevent any more stupid bugs */
+ src = NULL;
+ dst = NULL;
+ put_cpu_var(ext2_wr_wa);
+
+#ifdef EXT2_COMPR_DEBUG
+ /* TODO: remove this TEST */
+ /* mw: ext2_free_cluster_blocks can sleep: check we are not atomic */
+ schedule();
+#endif
+
+ /* Free unneeded blocks, and mark cluster as
+ compressed. */
+ err = ext2_free_cluster_blocks
+ (inode,
+ ext2_cluster_block0(inode, cluster) + free_ix,
+ ext2_cluster_nblocks(inode, cluster) - free_ix);
+ /* pjm 1998-06-15: This should help reduce fragmentation.
+ Actually, we could set block to clu_block0 + clu_nbytes,
+ and goal to the last allocated blkaddr in the compressed
+ cluster.
+ It would be nice if we would transfer the freed blocks
+ to preallocation, while we're at it. */
+// write_lock(&ei->i_meta_lock);
+ /* mw: i_next_alloc_goal and i_next_alloc_block were removed in 2.6.24.x
+ * so we dont need to set them to 0 (they are anyway, somehow).
+ */
+ //ei->i_next_alloc_goal = ei->i_next_alloc_block = 0;
+// write_unlock(&ei->i_meta_lock);
+ if (err < 0) {
+ goto done;
+ }
+ /* Note that ext2_free_cluster_blocks() marks the
+ cluster as compressed. */
+
+ /* Shuffle used blocks to beginning of block-number array. */
+ {
+ struct ext2_bkey key;
+ unsigned i;
+
+ if (!ext2_get_key(&key,
+ inode,
+ ext2_cluster_block0(inode, cluster))) {
+ ei->i_flags |= EXT2_ECOMPR_FL;
+ result = -EIO;
+ free_ix = 0;
+ }
+ for (i = 0; i < free_ix; i++) {
+ curr = ext2_get_key_blkaddr(&key);
+
+ if ((c_nblk == free_ix)
+ && (curr != bh[i]->b_blocknr)) {
+ /* "Can't happen", yet has
+ happened a couple of times. */
+ ext2_error(inode->i_sb, "ext2_compress_cluster",
+ "c_nblk=free_ix=%d, "
+ "curr=%u, b_blocknr=%lu, "
+ "lhp=%d , hm=<noinfo>, "
+ "ino=%lu, blk=%u",
+ c_nblk, curr,
+ (unsigned long) bh[i]->b_blocknr,
+ (int) last_hole_pos,
+ /*mw: became invalid due put_cpu:
+ (unsigned) le32_to_cpu(*(u32 *) head->
+ holemap),*/
+ inode->i_ino,
+ (unsigned)
+ ext2_cluster_block0(inode, cluster) + i);
+ }
+ err = ext2_set_key_blkaddr(&key,
+ (i < c_nblk
+ ? bh[i]->b_blocknr
+ : EXT2_COMPRESSED_BLKADDR));
+ if (err)
+ break;
+ if (!ext2_next_key(&key, 1)) {
+ ei->i_flags |= EXT2_ECOMPR_FL; /* sorry... */
+ result = -EIO;
+ break;
+ }
+ }
+ ext2_free_key(&key);
+ }
+ }
+
+ /*
+ * Unlock the working area.
+ */
+
+#ifdef EXT2_COMPR_REPORT_WA
+ printk(KERN_DEBUG "pid %d leaves critical region\n", current->pid);
+#endif
+
+ assert(c_nblk < u_nblk);
+ ext2_mark_algorithm_use(inode, alg);
+
+ /* TLL update b_assoc_map per 2.6.20 6-07-07 */
+ for (i = 0; i < c_nblk; i++)
+ if (bh[i] != NULL) {
+ bh[i]->b_assoc_map = inode->i_mapping;
+ bh[i]->b_page->mapping = inode->i_mapping; //Andreas 5-24-07 : necessary? WRONG?
+ }
+ //mw: we must force the writeback, otherwise ext2_readpage will get confused
+ // yaboo ding had similiar code above. but I think it makes more sense after
+ // the block shuffeling.
+ // Note: generic_oysnc_inode() made trouble with USB-Sticks and caused a lot
+ // of IO, stalled system ... therefore ll_rw_block() replace it. Anyway we already operate
+ // with this low-level function.
+
+ /*mw: new "hole" fix. hole == bdev bug! */
+ for (i = 0; i < c_nblk; i++) {
+
+ /* this was a hole (uncompressed)
+ * at the beginning of the cluster.
+ * so NO block was yet associated with it.
+ * But now we need it, because a compressed
+ * cluster always starts at the cluster.*/
+ if (!buffer_mapped(bh[i]) || bh[i]->b_bdev == NULL) {
+ u32 block = ext2_cluster_block0(inode, cluster);
+ ext2_get_block(inode, block + i, bh[i], 1);
+ //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh[i]->b_blocknr, (bh[i]->b_state & BH_Mapped), (bh[i]->b_page ? bh[i]->b_page->index : 0), bh[i]->b_bdev );
+ }
+ assert(buffer_mapped(bh[i]));
+ assert(bh[i]->b_bdev != NULL);
+ assert(bh[i]->b_bdev == inode->i_sb->s_bdev);
+ }
+
+ ll_rw_block(WRITE, c_nblk, bh);
+
+ CHECK_NOT_ATOMIC
+ //mw: seems we have to wait here, otherwise: crash!
+ for (i = 0; i < c_nblk; i++) {
+ if (bh[i])
+ wait_on_buffer(bh[i]);
+ //printk("written compressed block: Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh[i]->b_blocknr, (bh[i]->b_state & BH_Mapped), (bh[i]->b_page ? bh[i]->b_page->index : 0), bh[i]->b_bdev );
+ }
+
+
+#ifdef CONFIG_HIGHMEM
+ if (kmapped)
+ ext2_kunmap_cluster_pages(NULL, pg, NULL);
+#endif
+
+ inode->i_ctime = CURRENT_TIME; //mw: these two come always together. So I also put it here.
+ mark_inode_dirty_sync(inode);
+
+ //ext2_update_inode(inode, inode_needs_sync(inode)); //mw: might be able to fix pipe_write vs. readpage. mutex-rec-locking
+
+ /* COMPRBLK is already high, so no need to raise it. */
+ {
+ for (i = c_nblk; (i < EXT2_MAX_CLUSTER_BLOCKS) && (bh[i] != NULL);
+ i++) {
+ clear_buffer_dirty(bh[i]);
+ bh[i]->b_blocknr = 0;
+ clear_bit(BH_Mapped, &bh[i]->b_state);
+ clear_bit(BH_Uptodate, &bh[i]->b_state);
+ }
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (pg[i] == NULL) {
+ break;
+ }
+ assert(PageLocked(pg[i]));
+ ClearPageUptodate(pg[i]);
+ unlock_page(pg[i]);
+ page_cache_release(pg[i]);
+ }
+
+ /* invalidate_inode_buffers replacement code: TLL 02/21/07
+ * e2compr on post 2.6.10 kernels do not have an uptodate
+ * mapping->assoc_mapping (other Vm(?) changes require it be
+ * made explicit, 2.4 kernels have it implicit). Therefore, when
+ * umount is called, a GPF ensues from a NULL ops pointer.
+ * e2c on a USB thumbdrive mounted as the root fs does not
+ * support repeated compress/uncompress cycles on a given file.
+ * Inlined the flush list code to explicityly force update to
+ * disk with a known valid bh list.
+ */
+
+ /* mw: I consider this code as ... not so good! */
+ /*
+ if (inode_has_buffers(inode)) {
+ //struct address_space *mapping = &inode->i_data;
+ // struct address_space *buffer_mapping = mapping->assoc_mapping;
+ // requires: inode->i_data->mapping->assoc_mapping; to be set
+ invalidate_inode_buffers(inode); // TLL do it proper 5-25-07
+ //if (dotrunc)
+ //ext2_truncate(inode); // TLL file size hack 6-19-07
+ }
+ */
+
+ }
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c(" < < < ext2_compress_cluster %i: [done cpr] inode=%ld\n", cluster, inode->i_ino);
+#endif
+ return result;
+
+
+ done:
+
+#ifdef CONFIG_HIGHMEM
+ if (kmapped)
+ ext2_kunmap_cluster_pages(NULL, pg, NULL);
+#endif
+
+ {
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (pg[i] == NULL) {
+ break;
+ }
+ unlock_page(pg[i]);
+ page_cache_release(pg[i]);
+ }
+ /* TLL cp to compr dir bug fix 03-25-07
+ Truncate uncompressed files to their uncompressed
+ length, i.e. force kernel to update inode and sb */
+ //if(dotrunc)
+ //26.08.2011: ext2_truncate(inode) does not exist anymore
+ ext2_truncate_blocks(inode, inode->i_size);
+
+ }
+#ifdef EXT2_COMPR_REPORT_VERBOSE
+ {
+ int i;
+
+ printk(KERN_DEBUG "ext2_compress_cluster[end]: buffers kept for cluster=%d\n", cluster);
+ for (i = 0; i < nbh; i++) {
+ if (bh[i]) {
+ printk(KERN_DEBUG "2buffer_head[%d]: blocknr=%lu, addr=0x%p ", i, (unsigned long) bh[i]->b_blocknr, bh[i]);
+ if (bh[i]->b_page)
+ printk(KERN_DEBUG "2:[page->index=%ld]\n", bh[i]->b_page->index);
+ else
+ printk(KERN_DEBUG "[No page]\n");
+ } else
+ printk(KERN_DEBUG "buffer_head[%d] is NULL\n", i);
+ }
+ }
+#endif
+
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c(" < < < ext2_compress_cluster %i: [done NO cpr] inode=%ld\n", cluster, inode->i_ino);
+#endif
+ return result;
+}
+
+
+/* Go through all the clusters and compress them if not already
+ compressed.
+
+ This is called by ext2_put_inode() and ext2_release_file(). Later,
+ we may have ext2_ioctl() call it (when EXT2_COMPR_FL rises). None
+ of the callers does any locking, so we do it here.
+
+ Neither of the current callers uses the return code, but we get ready
+ for if we start using it.
+
+ Returns 0 on "success" (whether or not we cleared EXT2_CLEANUP_FL
+ or EXT2_DIRTY_FL bits), -errno on error. */
+int ext2_cleanup_compressed_inode(struct inode *inode)
+{
+ u32 cluster;
+ u32 n_clusters;
+ int dirty = 0;
+ int err = 0;
+ u32 comprblk_mask;
+ atomic_t start_i_count = inode->i_count;
+ int retry = 0;
+ int have_downed;
+ struct ext2_inode_info *ei = EXT2_I(inode);
+#ifdef EXT2_COMPR_REPORT
+ char bdn[BDEVNAME_SIZE];
+#endif
+
+ /* impl: Actually, this assertion could fail if the kernel
+ isn't locked. I haven't looked, but I suppose that the
+ kernel always is locked when this is called. */
+ assert(ei->i_compr_flags & EXT2_CLEANUP_FL);
+
+#ifdef EXT2_COMPR_REPORT_PUT
+ printk(KERN_DEBUG "ext2_cleanup_compressed_inode() called for pid %d; "
+ "dev=%s, ino=%lu, i_state=0x%lx, i_count=%u\n",
+ current->pid, bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino,
+ inode->i_state, atomic_read(&inode->i_count));
+#endif
+
+ /* Do these tests twice: once before down() and once after. */
+ for (have_downed = 0;; have_downed++) {
+ if ((ei->i_flags & (EXT2_COMPR_FL | EXT2_DIRTY_FL))
+ != (EXT2_COMPR_FL | EXT2_DIRTY_FL)) {
+ if (have_downed)
+ goto out;
+ /* TLL 5-25-07 changed from a warning to trace */
+ /*trace_e2c("ext2_cleanup_compressed_inode: trying to un/compress an "
+ "uncompressable file.\n"
+ "i_flags=%#x. (dev=%s, ino=%lu, down=%d)\n",
+ ei->i_flags, bdevname(inode->i_sb->s_bdev, bdn),
+ inode->i_ino, have_downed); */
+ return 0;
+ }
+
+ /* test if file is mapped by mmap */
+ if (mapping_mapped(inode->i_mapping))
+ {
+ //trace_e2c("ext2_cleanup_compressed_inode: (dev. %s): ino=%ld: file mapped, does not compress cluster\n", bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino);
+ if (have_downed)
+ goto out;
+ else
+ return 0;
+ }
+
+ if (IS_RDONLY(inode)
+ || (ei->i_flags & EXT2_ECOMPR_FL)) {
+ ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
+ if (have_downed)
+ goto out;
+ else
+ return 0;
+ }
+
+ //mw
+ if (ext2_get_dcount(inode) > 1) {
+ err = 0;
+ //printk("Compress: file busy (dcount: %i>1)\n", ext2_get_dcount(inode));
+ if (have_downed)
+ goto out;
+ else
+ return 0;
+ }
+
+ if (have_downed)
+ break;
+
+ /* Quotas aren't otherwise kept if file is opened O_RDONLY. */
+ dquot_initialize(inode);
+
+ /* Check whether OSYNC of inode is acutally running */
+ //if (ei->i_compr_flags & EXT2_OSYNC_INODE)
+ //printk(KERN_DEBUG "OSYNC!\n");
+
+ /* I think:
+ * checking these flags should prevent that one Process aquires the MUTEX again,
+ * e.g. in a recursive call
+ * BUT: what happens acutally: two processes are working on this inode: pdflush and the userprogramm
+ * SO: the check might be correct if: ei->i_compr_flags & EXT2_OSYNC_INOD AND the same process already posesses this lock!!!
+ */
+ //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE))
+ //{
+ mutex_lock(&inode->i_mutex);
+#ifdef EXT2_COMPR_REPORT_MUTEX
+ printk(KERN_DEBUG "CLEANUP_LOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
+#endif
+ //}
+ }
+ n_clusters = ext2_n_clusters(inode);
+
+#ifdef EXT2_COMPR_REPORT_PUT
+ printk(KERN_DEBUG "ext2: inode:%lu: put compressed, clusters = %d, flags = %x, pid = %u\n",
+ inode->i_ino, n_clusters, ei->i_flags, current->pid);
+#endif
+
+ assert(atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
+
+ /* Try to compress the clusters. We clear EXT2_DIRTY_FL only
+ if we looked at every cluster and if there was no error. */
+
+ /* impl: We raise EXT2_COMPRBLK_FL now so that ext2_ioctl()
+ doesn't try to change the cluster size beneath us. If need
+ be, we restore the bit to its original setting before
+ returning. Note that no-one else can _change_
+ EXT2_COMPRBLK_FL while we work because i_sem is down. */
+ /* impl: Note what's happening here with comprblk_mask. The
+ current state of COMPRBLK_FL (before we start) is that
+ (comprblk == 1) || (no compressed clusters). At the end of
+ the procedure, comprblk == one if (at least one compressed
+ cluster, or an error occurred preventing us from finding
+ out). */
+ comprblk_mask = ~EXT2_COMPRBLK_FL | ei->i_flags;
+ ei->i_flags |= EXT2_COMPRBLK_FL;
+
+ for (cluster = 0; cluster < n_clusters; cluster++) {
+ if (atomic_read(&inode->i_count) > atomic_read(&start_i_count)) {
+ /* This is a poor way of doing this (and doubly
+ poor now that the only users of i_count are
+ the dentries), but the idea is not to
+ compress things tht are likely to be
+ decompressed soon. I guess a better way of
+ doing this would be just to make sure tht
+ the stuff is in the page cache. */
+ retry = 1;
+ break;
+ }
+ err = ext2_cluster_is_compressed_fn(inode, cluster);
+ if (err == 0) {
+ //mw: ext2_compress_cluster might clean EXT2_COMPRBLK_FL, therefore raise it for every new cluster
+ ei->i_flags |= EXT2_COMPRBLK_FL;
+
+ err = ext2_compress_cluster(inode, cluster);
+ if (err < 0)
+ dirty = 1;
+ else if (err > 0)
+ comprblk_mask = ~0ul;
+ } else if (err < 0)
+ break;
+ else {
+ err = 0;
+ assert(comprblk_mask == ~0ul); /* i.e. that EXT2_COMPRBLK_FL was high. */
+ }
+ }
+
+ if ((cluster >= n_clusters) && !dirty)
+ ei->i_flags &= ~EXT2_DIRTY_FL;
+ if (!retry) {
+ ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
+ ei->i_flags &= comprblk_mask;
+ }
+
+ /* We clear EXT2_CLEANUP_FL because, otherwise, we'll get
+ called again almost immediately. */
+
+ /*
+ * The CLEANUP flag *MUST* be cleared, otherwise the iput routine
+ * calls ext2_put_inode() again (because i_dirt is set) and there
+ * is a loop. The control scheme (CLEANUP + DIRTY flags) could
+ * probably be improved. On the other hand, i_dirt MUST be set
+ * because we may have sleeped, and we must force the iput routine
+ * to look again at the i_count ...
+ */
+ /* TODO: Have a look at this cleanup scheme. The above
+ comment sounds wrong. */
+
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+ out:
+
+#ifdef EXT2_COMPR_REPORT_MUTEX
+ printk(KERN_DEBUG "CLEANUP_UNLOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
+#endif
+
+// if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) { /* MW 5-16-07 */
+ mutex_unlock(&inode->i_mutex);
+// } /* MW 5-16-07 */
+ return err; /* TODO: Check that ,err` is appropriate. */
+}
+
+
+int ext2_recognize_compressed(struct inode *inode, unsigned cluster)
+{
+ /* ext2_recognize_compressed(): Check tht the cluster is valid
+ in every way, and then do the EXT2_COMPRESSED_BLKADDR
+ thing. */
+ /* nyi, fixme. All of the userspace stuff (EXT2_NOCOMPR_FL
+ etc.) needs work, so I might as well leave this. See
+ ioctl.c for a description of what it's supposed to do. */
+ return -ENOSYS;
+}
+
+
+/* Look for compressed clusters. If none, then clear EXT2_COMPRBLK_FL.
+
+ Called by:
+ ext2_truncate().
+ */
+void ext2_update_comprblk(struct inode *inode)
+{
+ unsigned block, last_block;
+ struct ext2_bkey key;
+ struct ext2_inode_info *ei = EXT2_I(inode);
+
+ assert(ei->i_flags & EXT2_COMPRBLK_FL);
+ if (inode->i_size == 0) {
+ ei->i_flags &= ~EXT2_COMPRBLK_FL;
+ trace_e2c("ext2_update_comprblk 1: inode: %lu removed EXT2_COMPRBLK_FL!\n", inode->i_ino);
+ return;
+ }
+ last_block = ROUNDUP_RSHIFT(inode->i_size,
+ inode->i_sb->s_blocksize_bits) - 1;
+ block = ext2_first_cluster_nblocks(inode) - 1;
+
+ assert(atomic_read(&inode->i_mutex.count) <= 0);
+
+ if (!ext2_get_key(&key, inode, block))
+ return;
+ for (;;) {
+ if (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR)
+ goto out;
+ if (block >= last_block)
+ goto clear;
+ if (!ext2_next_key(&key, ei->i_clu_nblocks))
+ goto out;
+ block += ei->i_clu_nblocks;
+ }
+ clear:
+ trace_e2c("ext2_update_comprblk 2: inode: %lu removed EXT2_COMPRBLK_FL!\n", inode->i_ino);
+ ei->i_flags &= ~EXT2_COMPRBLK_FL;
+ out:
+ ext2_free_key(&key);
+ assert(atomic_read(&inode->i_mutex.count) <= 0);
+
+}
+
+
+/*
+ * allocate working areas
+ */
+
+DEFINE_PER_CPU(struct ext2_wa_S *, ext2_rd_wa) = NULL;
+DEFINE_PER_CPU(struct ext2_wa_S *, ext2_wr_wa) = NULL;
+
+/* SMP, setup wa's. caller must hold wa already via get_cpu_var */
+void ext2_alloc_rd_wa(){
+ if ((__get_cpu_var(ext2_rd_wa) == NULL) ) {
+ size_t rsize = 2 * EXT2_MAX_CLUSTER_BYTES; //mw: just guessing
+
+ __get_cpu_var(ext2_rd_wa) = vmalloc (rsize);
+ if (__get_cpu_var(ext2_rd_wa) == NULL)
+ printk ("EXT2-fs: can't allocate working area; compression turned off.\n");
+ else {
+ printk ("ext2-compression: allocated read buffer for CPU%i at %p-%p (%zu bytes)\n",
+ get_cpu(), __get_cpu_var(ext2_rd_wa), (char *)__get_cpu_var(ext2_rd_wa) + rsize, rsize);
+# ifdef EXT2_COMPR_REPORT_WA
+ printk (KERN_INFO "EXT2-fs: rd_wa=%p--%p (%d)\n",
+ ext2_rd_wa, (char *)ext2_rd_wa + rsize, rsize);
+# endif
+ put_cpu();
+ }
+ }
+}
+
+void ext2_alloc_wr_wa(){
+
+ if ((__get_cpu_var(ext2_wr_wa) == NULL) ) {
+ size_t wsize = 2 * EXT2_MAX_CLUSTER_BYTES; //mw: just guessing
+ __get_cpu_var(ext2_wr_wa) = vmalloc (wsize);
+
+ if (__get_cpu_var(ext2_wr_wa) == NULL)
+ printk ("EXT2-fs: can't allocate working area; "
+ "compression turned off.\n");
+ else {
+ printk ("ext2-compression: allocated write buffer for CPU%i at %p-%p (%zu bytes)\n",
+ get_cpu(), __get_cpu_var(ext2_wr_wa), (char *)__get_cpu_var(ext2_wr_wa) + wsize, wsize);
+#ifdef EXT2_COMPR_REPORT_WA
+ printk (KERN_INFO "EXT2-fs: wr_wa=%p--%p (%d)\n",
+ ext2_wr_wa, (char *)ext2_wr_wa + wsize, wsize);
+#endif
+ put_cpu();
+ }
+ }
+}
+
+
--- linux-3.2-rc5/fs/ext2/e2zlib.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/e2zlib.c 2011-12-13 14:22:47.841975843 +0100
@@ -0,0 +1,74 @@
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs_c.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/zlib.h>
+#include <linux/vmalloc.h>
+
+static DEFINE_PER_CPU(struct crypto_comp *, tfm) = NULL;
+
+size_t ext2_iZLIB(int action)
+{
+ /*mw: we init tfm when we need it...*/
+ return 0;
+}
+
+
+size_t ext2_wZLIB(__u8 * ibuf, __u8 * obuf, void *heap,
+ size_t ilen, size_t olen, int level)
+{
+ int ret, dlen;
+
+ if (!try_module_get(THIS_MODULE))
+ return 0;
+
+ /*check if we already have a tfm*/
+ get_cpu_var(tfm);
+ if (__get_cpu_var(tfm) == NULL){
+ __get_cpu_var(tfm) = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC);
+ }
+ assert(__get_cpu_var(tfm) != NULL);
+
+ dlen = olen;
+ ret = crypto_comp_compress(__get_cpu_var(tfm) , ibuf, ilen, obuf, &dlen);
+
+ put_cpu_var(tfm);
+
+ if (ret) {
+ //printk(KERN_DEBUG "ext2_wZLIB: crypto_comp_compress failed: %d, ilen: %d, olen: %d\n", ret, ilen, olen);
+ return 0;
+ }
+ return dlen;
+}
+
+
+size_t ext2_rZLIB(__u8 * ibuf, __u8 * obuf, void *heap,
+ size_t ilen, size_t olen, int ignored)
+{
+ int ret, dlen;
+
+ if (!try_module_get(THIS_MODULE))
+ return 0;
+
+ /*check if we already have a tfm*/
+ get_cpu_var(tfm);
+ if (__get_cpu_var(tfm) == NULL){
+ __get_cpu_var(tfm) = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC);
+ }
+ assert(__get_cpu_var(tfm) != NULL);
+
+ dlen = olen;
+ ret = crypto_comp_decompress(__get_cpu_var(tfm), ibuf, ilen, obuf, &dlen);
+
+ put_cpu_var(tfm);
+
+ if (ret) {
+ //printk(KERN_DEBUG "ext2_wZLIB: crypto_comp_decompress failed: %d, ilen: %d, olen: %d\n", ret, ilen, olen);
+ return 0;
+ }
+
+ return dlen;
+}
--- linux-3.2-rc5/fs/ext2/adler32.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/adler32.c 2011-12-13 14:22:47.841975844 +0100
@@ -0,0 +1,43 @@
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-1998 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id: e2compr2.6.25.patch,v 1.1.2.1 2008/04/17 09:49:32 winkler Exp $ */
+
+#define BASE 65521L /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i) {s1 += buf[i]; s2 += s1;}
+#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf) DO8(buf,0); DO8(buf,8);
+
+/* ========================================================================= */
+unsigned long ext2_adler32(unsigned long adler, const unsigned char *buf, unsigned int len)
+{
+ unsigned long s1 = adler & 0xffff;
+ unsigned long s2 = (adler >> 16) & 0xffff;
+ int k;
+
+ if (buf == 0) return 1L;
+
+ while (len > 0) {
+ k = len < NMAX ? len : NMAX;
+ len -= k;
+ while (k >= 16) {
+ DO16(buf);
+ buf += 16;
+ k -= 16;
+ }
+ if (k != 0) do {
+ s1 += *buf++;
+ s2 += s1;
+ } while (--k);
+ s1 %= BASE;
+ s2 %= BASE;
+ }
+ return (s2 << 16) | s1;
+}
--- linux-3.2-rc5/fs/ext2/super.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/super.c 2011-12-13 14:22:47.843975906 +0100
@@ -32,7 +32,12 @@
#include <linux/log2.h>
#include <linux/quotaops.h>
#include <asm/uaccess.h>
+#ifdef CONFIG_EXT2_COMPRESS
+#include <linux/vmalloc.h>
+#include <linux/ext2_fs_c.h>
+#else
#include "ext2.h"
+#endif
#include "xattr.h"
#include "acl.h"
#include "xip.h"
@@ -393,7 +398,11 @@ enum {
Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
- Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
+ Opt_acl, Opt_noacl,
+#ifdef CONFIG_EXT2_COMPRESS
+ Opt_force_compat,
+#endif
+ Opt_xip, Opt_ignore, Opt_err, Opt_quota,
Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
};
@@ -426,6 +435,9 @@ static const match_table_t tokens = {
{Opt_ignore, "noquota"},
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
+#ifdef CONFIG_EXT2_COMPRESS
+ {Opt_force_compat, "force-compat"},
+#endif
{Opt_reservation, "reservation"},
{Opt_noreservation, "noreservation"},
{Opt_err, NULL}
@@ -569,6 +581,11 @@ static int parse_options(char *options,
clear_opt(sbi->s_mount_opt, RESERVATION);
ext2_msg(sb, KERN_INFO, "reservations OFF");
break;
+#ifdef CONFIG_EXT2_COMPRESS
+ case Opt_force_compat:
+ set_opt(sbi->s_mount_opt, FORCE_COMPAT);
+ break;
+#endif
case Opt_ignore:
break;
default:
@@ -585,6 +602,10 @@ static int ext2_setup_super (struct supe
int res = 0;
struct ext2_sb_info *sbi = EXT2_SB(sb);
+#ifdef CONFIG_EXT2_COMPRESS
+ printk (KERN_INFO E2COMPR_VERSION "\n");
+#endif
+
if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
ext2_msg(sb, KERN_ERR,
"error: revision level too high, "
@@ -876,6 +897,65 @@ static int ext2_fill_super(struct super_
le32_to_cpu(features));
goto failed_mount;
}
+#ifdef CONFIG_EXT2_COMPRESS
+ /* Check that required algorithms are available. */
+ /* todo: Provide a mount option to override this. */
+ /*
+ * Philosophical bug: we assume that an algorithm's
+ * module is available if and only if this kernel was
+ * compiled with that algorithm as a module. This is
+ * untrue, but it is unclear what the right thing to
+ * do is.
+ */
+ j = 0; /* error flag */
+ if ((es->s_feature_incompat
+ & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION))
+ && (es->s_algorithm_usage_bitmap
+ & ~cpu_to_le32(EXT2_ALGORITHMS_SUPP))) {
+ /*
+ * The filesystem employs an algorithm not
+ * supported by this filesystem. Issue warning or
+ * error.
+ */
+ for (i = 0; i < 32; i++) {
+ if (!(es->s_algorithm_usage_bitmap
+ & cpu_to_le32(1 << i))
+ || ((EXT2_ALGORITHMS_SUPP
+ & (1 << i))))
+ continue;
+ /*
+ * TODO: Can't this message be moved outside
+ * of the for loop?
+ */
+ if (!j) {
+ if (test_opt(sb, FORCE_COMPAT))
+ printk(KERN_WARNING
+ "EXT2-fs: %s: "
+ "uses unsupported "
+ "compression algorithms",
+ sb->s_id);
+ else
+ printk("EXT2-fs: %s: couldn't mount "
+ "because of unsupported "
+ "compression algorithms",
+ sb->s_id);
+ j = 1;
+ }
+ if (i < EXT2_N_ALGORITHMS)
+ printk(" %s", ext2_algorithm_table[i].name);
+ else
+ printk(" %u", i);
+ }
+ }
+ if (j) {
+ if (test_opt(sb, FORCE_COMPAT))
+ printk(" but ignoring as you request.\n");
+ else {
+ printk(".\n");
+ goto failed_mount;
+ }
+ }
+#endif /* CONFIG_EXT2_COMPRESS */
if (!(sb->s_flags & MS_RDONLY) &&
(features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of "
--- linux-3.2-rc5/fs/ext2/ialloc.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/ialloc.c 2011-12-13 14:22:47.845975968 +0100
@@ -470,6 +470,9 @@ struct inode *ext2_new_inode(struct inod
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh) {
+#ifdef CONFIG_EXT2_COMPRESS
+ EXT2_I(inode)->i_flags &= ~EXT2_COMPR_FL;
+#endif
err = -EIO;
goto fail;
}
@@ -558,6 +561,17 @@ got:
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_flags =
ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
+#ifdef CONFIG_EXT2_COMPRESS
+ /*
+ * The EXT2_COMPR flag is inherited from the parent
+ * directory as well as the cluster size and the compression
+ * algorithm.
+ */
+ ei->i_log2_clu_nblocks = EXT2_I(dir)->i_log2_clu_nblocks;
+ ei->i_clu_nblocks = EXT2_I(dir)->i_clu_nblocks;
+ ei->i_compr_method = EXT2_I(dir)->i_compr_method;
+ ei->i_compr_flags = 0;
+#endif
ei->i_faddr = 0;
ei->i_frag_no = 0;
ei->i_frag_size = 0;
--- linux-3.2-rc5/fs/ext2/balloc.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/balloc.c 2011-12-13 14:22:47.847976031 +0100
@@ -11,8 +11,13 @@
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
+#ifdef CONFIG_EXT2_COMPRESS
+#include <linux/quotaops.h>
+#include <linux/ext2_fs_c.h>
+#else
#include "ext2.h"
#include <linux/quotaops.h>
+#endif
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/buffer_head.h>
@@ -499,6 +504,13 @@ void ext2_free_blocks (struct inode * in
struct ext2_super_block * es = sbi->s_es;
unsigned freed = 0, group_freed;
+
+#ifdef CONFIG_EXT2_COMPRESS
+ assert((block != EXT2_COMPRESSED_BLKADDR)
+ || !S_ISREG(inode->i_mode)
+ || !(EXT2_SB(sb)->s_es->s_feature_incompat
+ & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)));
+#endif
if (block < le32_to_cpu(es->s_first_data_block) ||
block + count < block ||
block + count > le32_to_cpu(es->s_blocks_count)) {
--- linux-3.2-rc5/fs/ext2/inode.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/inode.c 2011-12-13 14:22:47.852976189 +0100
@@ -32,7 +32,14 @@
#include <linux/mpage.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#ifdef CONFIG_EXT2_COMPRESS
+#include <linux/kmod.h>
+#include <linux/ext2_fs_c.h>
+#include <linux/spinlock.h>
+#include <linux/pagevec.h>
+#else
#include "ext2.h"
+#endif
#include "acl.h"
#include "xip.h"
@@ -40,6 +47,34 @@ MODULE_AUTHOR("Remy Card and others");
MODULE_DESCRIPTION("Second Extended Filesystem");
MODULE_LICENSE("GPL");
+#ifdef CONFIG_EXT2_COMPRESS
+/* mw: this function counts all references
+ * to this inode. this is necessary to
+ * refuse un/compression if the file has
+ * more than one refernce, I guess. */
+int ext2_get_dcount(struct inode *inode)
+{
+ struct dentry *dentry;
+ struct list_head *head, *next, *tmp;
+ int count;
+
+ head = &inode->i_dentry;
+ next = inode->i_dentry.next;
+ count = 0;
+ while (next != head) {
+ dentry = list_entry(next, struct dentry, d_alias);
+ tmp = next;
+ next = tmp->next;
+ spin_lock(&dentry->d_lock);
+ count += dentry->d_count;
+ spin_unlock(&dentry->d_lock);
+ //mw: similar to fs/dcache.c
+ }
+
+ return count;
+}
+#endif
+
static int __ext2_write_inode(struct inode *inode, int do_sync);
/*
@@ -54,7 +89,9 @@ static inline int ext2_inode_is_fast_sym
inode->i_blocks - ea_blocks == 0);
}
+#ifndef CONFIG_EXT2_COMPRESS
static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
+#endif
static void ext2_write_failed(struct address_space *mapping, loff_t to)
{
@@ -240,7 +277,11 @@ static Indirect *ext2_get_branch(struct
*err = 0;
/* i_data is not going away, no lock needed */
add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets);
+#ifdef CONFIG_EXT2_COMPRESS
+ if (HOLE_BLKADDR(p->key))
+#else
if (!p->key)
+#endif
goto no_block;
while (--depth) {
bh = sb_bread(sb, le32_to_cpu(p->key));
@@ -251,7 +292,11 @@ static Indirect *ext2_get_branch(struct
goto changed;
add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
read_unlock(&EXT2_I(inode)->i_meta_lock);
+#ifdef CONFIG_EXT2_COMPRESS
+ if (HOLE_BLKADDR(p->key))
+#else
if (!p->key)
+#endif
goto no_block;
}
return NULL;
@@ -297,7 +342,11 @@ static ext2_fsblk_t ext2_find_near(struc
/* Try to find previous block */
for (p = ind->p - 1; p >= start; p--)
+#ifdef CONFIG_EXT2_COMPRESS
+ if (!HOLE_BLKADDR(*p))
+#else
if (*p)
+#endif
return le32_to_cpu(*p);
/* No such thing, so let's try location of indirect block */
@@ -498,7 +547,13 @@ static int ext2_alloc_branch(struct inod
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
branch[n].bh = bh;
+#ifndef CONFIG_EXT2_COMPRESS
lock_buffer(bh);
+#else
+ CHECK_NOT_ATOMIC
+ if (!buffer_uptodate(bh))
+ wait_on_buffer(bh);
+#endif
memset(bh->b_data, 0, blocksize);
branch[n].p = (__le32 *) bh->b_data + offsets[n];
branch[n].key = cpu_to_le32(new_blocks[n]);
@@ -514,7 +569,9 @@ static int ext2_alloc_branch(struct inod
*(branch[n].p + i) = cpu_to_le32(++current_block);
}
set_buffer_uptodate(bh);
+#ifndef CONFIG_EXT2_COMPRESS
unlock_buffer(bh);
+#endif
mark_buffer_dirty_inode(bh, inode);
/* We used to sync bh here if IS_SYNC(inode).
* But we now rely upon generic_write_sync()
@@ -675,6 +732,7 @@ static int ext2_get_blocks(struct inode
if (err == -EAGAIN || !verify_chain(chain, partial)) {
while (partial > chain) {
brelse(partial->bh);
+// bforget(partial->bh); /*mw: e2c-pre-2.6.30.4 used bforget here*/
partial--;
}
partial = ext2_get_branch(inode, depth, offsets, chain, &err);
@@ -766,21 +824,608 @@ int ext2_fiemap(struct inode *inode, str
ext2_get_block);
}
+#ifdef CONFIG_EXT2_COMPRESS
+/*
+ * Readpage method that will take care of decompression.
+ */
+/* effic: I (pjm) think tht at present, reading a 32KB cluster 4KB at
+ a time does `decompress 4KB' for the first 4KB, then `decompress
+ 8KB' for the second, and so on. See if we can provide the page
+ cache with all the pages in a cluster. The problem is, we don't
+ want to erase anything tht hasn't been written to disk, so we can't
+ just call update_vm_cache(). The plan at present is to remember
+ what the contents of ext2_rd_wa.u come from, and don't bother
+ decompressing anything if the working area already contains the
+ right data. However, this is only a win where adjacent calls to
+ ext2_decompress_blocks() request the same cluster. We could force
+ that by copying some code from generic_file_read() (but check for
+ deadlocks before doing anything like that), but instead I'm taking
+ the more passive approach of hoping for the best. */
+static int ext2_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES];
+ u32 cluster0, max_cluster;
+ int i, blockOfCluster, blocksToDo, npg;
+ const int inc = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
+ struct ext2_inode_info *ei = EXT2_I(page->mapping->host);
+#ifdef CONFIG_HIGHMEM
+ int kmapped = 0; //mw
+#endif
+
+ int iClusterCnt;
+
+ /* For directories, fall out through default routine */
+ if (S_ISDIR(inode->i_mode))
+ {
+ int rc;
+
+ rc = block_read_full_page(page,ext2_get_block);
+ assert(!rc);
+ return rc;
+ }
+
+ /* The semaphore prevents us trying to compress and decompress
+ the cluster at the same time, or compress a cluster in the
+ middle of reading it (thinking it to be uncompressed).
+
+ You may not like the fact that we hold the semaphore across
+ readpage (given that it isn't held without e2compr compiled
+ in), but it does guarantee that we won't compress the
+ cluster during readpage. (OTOH, it's unlikely, if not
+ impossible, for someone to ,compress a cluster and rewrite
+ the blocks` before the readpage completes.) */
+ /* This procedure used to have `#ifndef EXT2_LOCK_BUFFERS'
+ around all the semaphore stuff, and unlocked each buffer
+ before brelsing them ifdef EXT2_LOCK_BUFFERS. I (pjm,
+ 1998-01-20) have removed that because (a) EXT2_LOCK_BUFFERS
+ isn't #defined anywhere, and doesn't appear outside of this
+ function, and (b) I haven't looked at what effect locking
+ the buffers has. You may like to reintroduce the idea of
+ buffer locking to this function if you're more familiar
+ with buffer locking than I, and believe that the full i_sem
+ isn't necessary to protect from races (people seeing raw
+ compressed data) between readpage and ext2_file_write(),
+ ext2_compress_cluster() and ext2_truncate(). */
+ unlock_page(page);
+ mutex_lock(&inode->i_mutex);
+
+ assert (atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
+
+ //mw: added EXT2_COMPR_FL, because EXT2_COMPRBLK_FL mit change without mutex !!!
+ if ( !(ei->i_flags & (EXT2_COMPRBLK_FL|EXT2_COMPR_FL))
+ || (ei->i_flags & EXT2_NOCOMPR_FL) )
+ {
+ goto readpage_uncompressed;
+ }
+
+ {
+ register u32 blockOfFile
+ = (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;
+
+ blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
+ cluster0 = ext2_block_to_cluster(inode, blockOfFile);
+ max_cluster = ext2_block_to_cluster
+ (inode, blockOfFile + blocksToDo - 1);
+ blockOfCluster
+ = blockOfFile - ext2_cluster_block0(inode, cluster0);
+ }
+
+ /* return -???, any idea which code. do_generic_file_read() cares, ext2_readpages() doesn't.
+ maybe I should look at the "generic" readpage() and see what it returns in this case */
+
+ /* Check if any part of the requested area contains part of a
+ compressed cluster. If not, we can use default ext2_readpage().
+
+ (Note that we don't have to worry about a cluster becoming
+ compressed in the meantime, because we have the semaphore.)
+
+ A page can cover up to 9 clusters. (The maximum can only
+ occur with 32KB pages, 4KB clusters, and a non-page-aligned
+ offset. Thanks go to Kurt Fitzner for reporting that
+ page offsets needn't be aligned; see generic_file_mmap().) */
+ {
+ int isCmp[(PAGE_SIZE >> 12) + 1];
+ u8 *dst;
+ unsigned clu_ix;
+
+ assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp));
+ for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
+ isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix);
+ if (isCmp[clu_ix] < 0){
+ printk("IO-ERROR: isCmp\n");
+ goto io_error;
+ }
+ }
+
+ for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++)
+ if (isCmp[clu_ix] > 0)
+ goto readpage_compressed;
+ /* fall through */
+ readpage_uncompressed:
+ {
+ int rc=0;
+ lock_page(page);
+
+ /* Did somebody else fill it already? */
+ if (PageUptodate(page) ){ //mw: necessary for DEBUG! anyway checked in do_generic_mapping_read
+ unlock_page(page);
+ }
+ else {
+ //try_to_free_buffers(page);
+ rc = block_read_full_page(page,ext2_get_block);
+ }
+ mutex_unlock(&inode->i_mutex);
+ assert(!rc);
+ return rc;
+ }
+
+ readpage_compressed:
+
+ /* Copied from block_read_full_page */
+ /* if (!PageLocked(page)) */
+ /* PAGE_BUG(page); */
+ lock_page(page);
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ mutex_unlock(&inode->i_mutex);
+ return(0);
+ }
+ get_page(page);
+
+ ClearPageUptodate(page);
+ ClearPageError(page);
+
+ dst = (u8 *) page_address(page);
+ for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
+ struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
+ int nbh, blocksThisClu;
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ pg[i] = NULL;
+ epg[i] = NULL;
+ }
+
+ /* clear_bit(PG_locked, &page->flags); */
+ npg = ext2_cluster_npages(inode, cluster0 + clu_ix);
+ nbh = ext2_get_cluster_pages(inode, cluster0 + clu_ix, pg, page, 0);
+
+ if (nbh <= 0) {
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
+ printk("no pages\n");
+ goto out;
+ }
+ iClusterCnt = ext2_cluster_npages(inode, cluster0);
+
+ nbh = ext2_get_cluster_extra_pages(inode, cluster0 + clu_ix, pg, epg);
+ if (nbh <= 0)
+ {
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
+ epg[i] = NULL;
+ printk("no extra pages\n");
+ goto out;
+ }
+ assert (iClusterCnt = ext2_cluster_npages(inode, cluster0));
+
+#ifdef CONFIG_HIGHMEM
+ ext2_kmap_cluster_pages(page, pg, epg);
+ kmapped = 1;
+#endif
+
+ nbh = ext2_get_cluster_blocks(inode, cluster0 + clu_ix, bh, pg, epg, 0);
+ if (nbh <= 0)
+ {
+ printk("no blocks\n");
+ goto out;
+ }
+
+ /* How many blocks (including holes) we need from this cluster. */
+ {
+ blocksThisClu = (ext2_cluster_nblocks(inode, cluster0 +
+ clu_ix) - blockOfCluster);
+ if (blocksThisClu > blocksToDo)
+ blocksThisClu = blocksToDo;
+ }
+
+ if (isCmp[clu_ix]) {
+ u8 const *src;
+ int n, nbytes_wanted;
+ struct ext2_cluster_head *head;
+ unsigned meth;
+# ifdef CONFIG_KMOD
+ unsigned alg;
+# endif
+
+ bh[0]->b_data = page_address(bh[0]->b_page);
+ head = (struct ext2_cluster_head *) bh[0]->b_data;
+
+ /* jmr 1998-10-28 Hope this is the last time I'm moving this code.
+ * Module loading must be done _before_ we lock wa, just think what
+ * can happen if we reallocate wa when somebody else uses it...
+ */
+ meth = head->method; /* only a byte, so no swabbing needed. */
+ if (meth >= EXT2_N_METHODS) {
+ printk("illegal method id\n");
+ ext2_msg(inode->i_sb,
+ "illegal method id",
+ "inode = %lu, id = %u",
+ inode->i_ino, meth);
+ goto out;
+ }
+# ifdef CONFIG_KMOD
+ alg = ext2_method_table[meth].alg;
+ if (!ext2_algorithm_table[alg].avail) {
+ char str[32];
+
+ sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
+ request_module(str);
+ }
+# endif /* CONFIG_KMOD */
+
+ /* Calculate nbytes_wanted. */
+ {
+ unsigned nblk_wanted, i;
+
+ /* We want to decompress the whole cluster */
+ //nblk_wanted = ext2_cluster_nblocks(inode, cluster0 + clu_ix);
+ nblk_wanted = npg << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); /*mw: FIXED */
+
+ for (i = nblk_wanted; i != 0;)
+ if (((--i >> 3) < head->holemap_nbytes)
+ && (head->holemap[i >> 3] & (1 << (i & 7))))
+ --nblk_wanted;
+ nbytes_wanted = (nblk_wanted
+ << inode->i_sb->s_blocksize_bits);
+ }
+
+ /* Decompress. */
+ get_cpu_var(ext2_rd_wa);
+ if (__get_cpu_var(ext2_rd_wa) == NULL)
+ {
+ ext2_alloc_rd_wa();
+ }
+ assert(__get_cpu_var(ext2_rd_wa) != NULL);
+
+ n = ext2_decompress_blocks(inode, bh, nbh, nbytes_wanted, cluster0 + clu_ix);
+ if (n < 0) {
+ assert(nbh >= 0);
+ printk("ext2_readpage: noblocks decompressed\n");
+ put_cpu_var(ext2_rd_wa);
+ goto out;
+ }
+
+# ifdef EXT2_COMPR_REPORT_VERBOSE_INODE
+ if (ei->i_flags & EXT2_COMPR_FL)
+ printk(KERN_DEBUG "ext2: mmap %04x:%lu: blocksToDo=%d, blockOfCluster=%d, blocksThisClu=%d, clu_nblocks=%d\n",
+ inode->i_rdev,
+ inode->i_ino,
+ blocksToDo,
+ blockOfCluster,
+ blocksThisClu,
+ ext2_cluster_nblocks(inode, cluster0 + clu_ix));
+# endif
+
+ /* */
+ {
+ unsigned i;
+ int ipg;
+
+ i = ext2_cluster_nblocks(inode, cluster0 + clu_ix) - 1;
+ //i = (npg << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - 1; /*mw: FIXED!!! (here: shift = 2Bit) */
+ //if(i+1 != ext2_cluster_nblocks(inode, cluster0 + clu_ix))
+ //printk("npg=%i, nbh=%i, npgf=%i, nbhf =%i, cluster:%i, dec_blk:%i, b_wanted:%i, size:%i\n ", ext2_cluster_npages(inode, cluster0 + clu_ix), ext2_cluster_nblocks(inode, cluster0 + clu_ix), npgtest, i+1, cluster0 + clu_ix, n, nbytes_wanted, inode->i_size);
+ blockOfCluster = 0;
+ assert(n > 0);
+ src = __get_cpu_var(ext2_rd_wa)->u + nbytes_wanted - inode->i_sb->s_blocksize;
+#ifdef EXT2_COMPR_REPORT
+ trace_e2c("ext2_readpage: copy data inc=%d blocksThisClu=%d, n=%d\n", inc, blocksThisClu, n);
+#endif
+ for (ipg = npg - 1; ipg >= 0; ipg--) {
+ if (pg[ipg] == NULL) {
+ i -= inc;
+ src -= PAGE_SIZE;
+ continue;
+ }
+ if (((inode->i_size-1) >> PAGE_SHIFT) == pg[ipg]->index) {
+ n = ((inode->i_size-1) & (PAGE_SIZE -1)) >> inode->i_sb->s_blocksize_bits;
+ i -= ((blocksThisClu-1) - n);
+ src -= ((blocksThisClu-1) - n) << inode->i_sb->s_blocksize_bits;
+ } else {
+ n = blocksThisClu - 1;
+ }
+ if (PageUptodate(pg[ipg]) ) {
+ for (;n >= 0;n--, i--) {
+ if (((i >> 3) >= head->holemap_nbytes)
+ || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
+ src -= inode->i_sb->s_blocksize;
+ }
+ }
+ } else {
+
+ dst = (u8 *) page_address(pg[ipg]) + (n << inode->i_sb->s_blocksize_bits);
+
+ for (;
+ n >= 0;
+ n--, i--, dst -= inode->i_sb->s_blocksize) {
+ assert(!buffer_dirty(bh[i]));
+ clear_buffer_dirty(bh[i]); //mw: had a refile_buffer in 2.4
+ if (((i >> 3) >= head->holemap_nbytes)
+ || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
+ assert(i >= 0);
+ memcpy(dst, src, inode->i_sb->s_blocksize);
+ src -= inode->i_sb->s_blocksize;
+ } else {
+ assert(i >= 0);
+ memset (dst, 0, inode->i_sb->s_blocksize);
+ }
+ //clear_bit(BH_Uptodate, &bh[i]->b_state);
+ }
+ SetPageUptodate(pg[ipg]);
+ }
+ }
+ }
+ put_cpu_var(ext2_rd_wa);
+ } else {
+ /* Uncompressed cluster. Just copy the data. */
+ int n;
+
+# ifdef EXT2_COMPR_REPORT_VERBOSE_INODE
+ if (ei->i_flags & EXT2_COMPR_FL)
+ printk(KERN_DEBUG
+ "ext2: mmap %lu: blocksToDo = %d, "
+ "blockOfCluster = %d, clu_nblocks = %d\n",
+ inode->i_ino, blocksToDo, blockOfCluster,
+ ext2_cluster_nblocks(inode, cluster0 +
+ clu_ix));
+# endif
+
+ for (n = 0;
+ n < blocksThisClu;
+ n++, dst += inode->i_sb->s_blocksize) {
+ if ((blockOfCluster + n < nbh)
+ && (bh[blockOfCluster + n] != NULL))
+ {
+ memcpy(dst,
+ bh[blockOfCluster + n]->b_data,
+ inode->i_sb->s_blocksize);
+ }
+ else
+ {
+ memset(dst, 0, inode->i_sb->s_blocksize);
+ }
+ }
+ blockOfCluster = 0;
+ } // end uncompressed Cluster
+
+ blocksToDo -= blocksThisClu;
+
+#ifdef CONFIG_HIGHMEM
+ if (kmapped)
+ ext2_kunmap_cluster_pages(page, pg, epg);
+#endif
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (epg[i] != NULL) {
+
+ ClearPageDirty(epg[i]);
+ ClearPageUptodate(epg[i]);
+ try_to_free_buffers(epg[i]);
+ unlock_page(epg[i]);
+ assert(page_count(epg[i]) <= 1);
+ page_cache_release(epg[i]);
+ }
+ }
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (pg[i] == NULL)
+ break;
+ if (pg[i] == page)
+ continue;
+ unlock_page(pg[i]);
+ page_cache_release(pg[i]);
+ }
+ //mw
+ assert (isCmp[clu_ix] == ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix));
+ } // end for-loop: Cluster
+ }
+
+ SetPageUptodate(page);
+ unlock_page(page);
+ atomic_dec(&page->_count);
+ mutex_unlock(&inode->i_mutex);
+ return 0;
+
+ out:
+
+#ifdef CONFIG_HIGHMEM
+ if (kmapped)
+ ext2_kunmap_cluster_pages(page, pg, epg);
+#endif
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (epg[i] != NULL) {
+
+ ClearPageDirty(epg[i]);
+ ClearPageUptodate(epg[i]);
+ try_to_free_buffers(epg[i]);
+ unlock_page(epg[i]);
+ assert(page_count(epg[i]) <= 1);
+ page_cache_release(epg[i]);
+ }
+ }
+
+ for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
+ if (pg[i] == NULL)
+ break;
+ if (pg[i] == page)
+ continue;
+ unlock_page(pg[i]);
+ page_cache_release(pg[i]);
+ }
+ mutex_unlock(&inode->i_mutex);
+ return 0;
+
+ io_error:
+#ifdef CONFIG_HIGHMEM
+ if (kmapped)
+ ext2_kunmap_cluster_pages(page, pg, epg);
+#endif
+ SetPageError(page);
+ unlock_page(page);
+ atomic_dec(&page->_count);
+ mutex_unlock(&inode->i_mutex);
+ printk("Readpage: IOERROR\n");
+ return -EIO; /* it is tested in do_generic_file_read(), ... */
+}
+#endif /* CONFIG_EXT2_COMPRESS */
+
static int ext2_writepage(struct page *page, struct writeback_control *wbc)
{
+/* mw (24/06/2008):
+ * WRITEPAGE: this code was also in e2compr 2.4 and once removed by yaboo ding.
+ * ext2_writepage() is also called for dirty pages. Usually we write using file_write() which
+ * wraps correctly to compressed files. BUT: a writeable memory map might
+ * produce dirty pages, which will be written back normally. this should/might fail.
+ * The following code should fix this bug, but this was not tested yet.
+ */
+#ifdef CONFIG_EXT2_COMPRESS
+#undef USE_WRITEPAGE
+//#define USE_WRITEPAGE
+#ifdef USE_WRITEPAGE
+
+ struct ext2_inode_info *ei = EXT2_I(page->mapping->host);
+ int retval;
+
+ struct inode *inode = page->mapping->host;
+ u32 cluster0, max_cluster;
+ int blocksToDo;
+
+ unlock_page(page);
+ //mw: do we need this ???
+ //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {
+ /* trace_e2c("ext2_writepage: inode"); */
+ mutex_lock(&inode->i_mutex);
+ /* trace_e2c(" down\n"); */
+ //}
+ if (!(ei->i_flags & EXT2_COMPRBLK_FL)
+ || (ei->i_flags & EXT2_NOCOMPR_FL) )
+ {
+ //mw: do we need this ???
+ //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {
+ /* trace_e2c("ext2_writepage: inode up 1\n"); */
+ mutex_unlock(&inode->i_mutex);
+ //}
+ lock_page(page);
+ return block_write_full_page(page, ext2_get_block, wbc);
+ }
+ /* */
+ {
+ register u32 blockOfFile
+ = (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;
+
+ blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
+ cluster0 = ext2_block_to_cluster(inode, blockOfFile);
+ max_cluster = ext2_block_to_cluster(inode, blockOfFile + blocksToDo - 1);
+ }
+
+ /* Check if any part of the requested area contains part of a
+ compressed cluster. If not, we can use default ext2_writepage().
+
+ (Note that we don't have to worry about a cluster becoming
+ compressed in the meantime, because we have the semaphore.)
+
+ A page can cover up to 9 clusters. (The maximum can only
+ occur with 32KB pages, 4KB clusters, and a non-page-aligned
+ offset. Thanks go to Kurt Fitzner for reporting that
+ page offsets needn't be aligned; see generic_file_mmap().) */
+
+ {
+ int isCmp[(PAGE_SIZE >> 12) + 1];
+ unsigned clu_ix;
+
+ assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp));
+ for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
+ isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix);
+ if (isCmp[clu_ix] < 0) {
+ //mw: do we need this ???if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {
+ /* trace_e2c("ext2_writepage: inode up 2\n"); */
+ lock_page(page);
+ mutex_unlock(&inode->i_mutex);
+ //}
+ return -EIO;
+ }
+ }
+
+ for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++)
+ if (isCmp[clu_ix] > 0)
+ ext2_decompress_cluster(inode, cluster0 + clu_ix);
+
+ //mw: do we need this ???
+ //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {
+ /* trace_e2c("ext2_writepage: inode up 3\n"); */
+ mutex_unlock(&inode->i_mutex);
+ //}
+ lock_page(page);
+
+ /* fall through */
+ }
+#endif /* CONFIG_EXT2_COMPRESS */
+#endif
return block_write_full_page(page, ext2_get_block, wbc);
}
+#ifndef CONFIG_EXT2_COMPRESS
static int ext2_readpage(struct file *file, struct page *page)
{
return mpage_readpage(page, ext2_get_block);
}
+#endif
static int
ext2_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
+#ifdef CONFIG_EXT2_COMPRESS
+/*
+ * For now, just read each page into cache and don't worry about emitting BIOs.
+ * (whitpa 02 Aug 2004).
+ */
+
+ unsigned page_idx;
+ struct pagevec lru_pvec;
+ int iError;
+
+ pagevec_init(&lru_pvec, 0);
+
+ for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+ struct page *page = list_entry(pages->prev, struct page, lru);
+
+ prefetchw(&page->flags);
+ list_del(&page->lru);
+
+ iError = add_to_page_cache(page, mapping, page->index, GFP_KERNEL);
+ if (!iError) {
+ if (!PageUptodate(page))
+ {
+ (void) ext2_readpage(file, page);
+ }
+ else
+ {
+ unlock_page(page);
+ }
+ if (!pagevec_add(&lru_pvec, page))
+ __pagevec_lru_add_file(&lru_pvec);
+ } else {
+ page_cache_release(page);
+ }
+
+ }
+ pagevec_lru_add_file(&lru_pvec);
+ BUG_ON(!list_empty(pages));
+ return 0;
+#else
return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
+#endif
}
static int
@@ -829,11 +1474,58 @@ static int ext2_nobh_writepage(struct pa
return nobh_writepage(page, ext2_get_block, wbc);
}
+#ifdef CONFIG_EXT2_COMPRESS
+static sector_t ext2_do_bmap(struct address_space *mapping, sector_t block)
+#else
static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
+#endif
{
return generic_block_bmap(mapping,block,ext2_get_block);
}
+#ifdef CONFIG_EXT2_COMPRESS
+/* Return 0 instead of EXT2_COMPRESSED_BLKADDR if EXT2_NOCOMPR_FL
+ * high. This is necessary for us to be able to use
+ * generic_readpage() when EXT2_NOCOMPR_FL is high.
+ */
+static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
+{
+ sector_t result;
+ struct inode *inode = mapping->host;
+
+ if ((EXT2_I(inode)->i_flags & (EXT2_COMPRBLK_FL | EXT2_NOCOMPR_FL))
+ == (EXT2_COMPRBLK_FL | 0)) {
+ int err;
+
+ err = ext2_cluster_is_compressed_fn
+ (inode, ext2_block_to_cluster(inode, block));
+ if (err > 0)
+ ext2_msg (inode->i_sb, "ext2_bmap",
+ "compressed cluster, inode %lu",
+ inode->i_ino);
+ if (err != 0)
+ return 0;
+ }
+
+ result = ext2_do_bmap(mapping, block);
+ if (result != EXT2_COMPRESSED_BLKADDR)
+ return result;
+
+ if (!(EXT2_SB(inode->i_sb)->s_es->s_feature_incompat
+ & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)))
+ ext2_error(inode->i_sb, "ext2_bmap",
+ "compressed_blkaddr (ino %lu, blk %lu) "
+ "on non-compressed fs",
+ inode->i_ino, (unsigned long) block);
+ if (!S_ISREG(inode->i_mode))
+ ext2_error(inode->i_sb, "ext2_bmap",
+ "compressed_blkaddr for non-regular file "
+ "(ino %lu, blk %lu)",
+ inode->i_ino, (unsigned long) block);
+ return 0;
+}
+#endif /* CONFIG_EXT2_COMPRESS */
+
static ssize_t
ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
@@ -853,6 +1545,18 @@ ext2_direct_IO(int rw, struct kiocb *ioc
static int
ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
+#ifdef CONFIG_EXT2_COMPRESS
+#ifdef USE_WRITEPAGE
+ struct ext2_inode_info *ei = EXT2_I(mapping->host);
+ if ( (ei->i_flags & EXT2_COMPRBLK_FL)
+ && !(ei->i_flags & EXT2_NOCOMPR_FL))
+ {
+ //NULL will invoke ext2_writepage for writeback, hopefully.
+ return mpage_writepages(mapping, wbc, NULL);
+ }
+ else
+#endif
+#endif
return mpage_writepages(mapping, wbc, ext2_get_block);
}
@@ -1001,6 +1705,12 @@ static inline void ext2_free_data(struct
for ( ; p < q ; p++) {
nr = le32_to_cpu(*p);
+#ifdef CONFIG_EXT2_COMPRESS
+ if (nr == EXT2_COMPRESSED_BLKADDR) {
+ *p = 0;
+ continue;
+ }
+#endif
if (nr) {
*p = 0;
/* accumulate blocks to free if they're contiguous */
@@ -1045,6 +1755,12 @@ static void ext2_free_branches(struct in
nr = le32_to_cpu(*p);
if (!nr)
continue;
+#ifdef CONFIG_EXT2_COMPRESS
+ if (nr == EXT2_COMPRESSED_BLKADDR) {
+ *p = 0;
+ continue;
+ }
+#endif
*p = 0;
bh = sb_bread(inode->i_sb, nr);
/*
@@ -1069,6 +1785,96 @@ static void ext2_free_branches(struct in
ext2_free_data(inode, p, q);
}
+/* pjm 1998-01-14: As far as I can tell, "I don't do any locking" is
+ no longer correct, as i_sem is downed for all write() and
+ truncate() stuff except where it doesn't matter (e.g. new inode). */
+
+#ifdef CONFIG_EXT2_COMPRESS
+/* If the EXT2_ECOMPR_FL bit is high, then things can go rather badly.
+ This can only happen if access permission was obtained before the
+ flag was raised. Also, it shouldn't be too much of a problem
+ unless the end point of truncation is a compressed cluster with a
+ compression error. */
+
+ /* From what I (Antoine) understand, the complexity of the truncate
+ code is due to the fact that we don't want to free blocks that
+ are still referenced. It does not ensure that concurrent read
+ operation will terminate properly, i.e., the semantic of reading
+ while somebody truncates is undefined (you can either get the old
+ data if you got the blocks before, or get plenty of zeros
+ otherwise). */
+
+/* todo: Provide error trapping in readiness for when i_op->truncate
+ allows a return code. */
+static void fix_compression (struct inode * inode)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ /*if (atomic_read(&inode->i_mutex.count) > 0)
+ {
+ printk("Assert Mutex failed for file: %s \n", inode_name(inode, 0));
+ dump_stack();
+ }*/
+
+ assert (ei->i_flags & EXT2_COMPRBLK_FL); /* one or more compressed clusters */
+ assert ((atomic_read(&inode->i_mutex.count) < 1)
+ || ((inode->i_nlink == 0)
+ && (atomic_read(&inode->i_count) == 0)));
+ /* pjm 1998-01-14: I think the below comment can safely be removed, as
+ it's impossible for someone to be compressing during truncate(), because
+ i_sem is down. */
+ /* Dans le cas ou les clusters peuvent etre compresses, cela pose
+ un probleme : il faudrait stopper aussi si le cluster est
+ comprime et ne contient pas plus de donnees que i_size ne
+ permet. Sinon, on peut passer son temps a decompresser un
+ cluster que quelqu'un d'autre compresse en meme
+ temps... (TODO). Cela ne peut arriver que si on reverifie apres
+ coup si le cluster est non compresse (ce qu'on fait a l'heure
+ actuelle) => faire autrement.
+
+ pjm fixme tr
+
+ If the clusters can be compressed, we'd have a problem: we'd
+ also need to stop if the cluster is compressed and doesn't
+ contain more data than i_size permits. Otherwise we can spend
+ time decompressing a cluster that someone else is compressing
+ at the same time. (TODO.) This can only happen if we reverify
+ "apres coup" ("after the event"? "after each time"?) "si" ("if"
+ or "that") the cluster is not compressed (as we are currently
+ doing) => do differently. */
+
+ /* todo: Handle errors from ext2_cluster_is_compressed().
+ (Except ext2_truncate() currently silently ignores errors
+ anyway.) */
+
+ if (!ext2_offset_is_clu_boundary(inode, inode->i_size)
+ && (! ( ei->i_flags & EXT2_NOCOMPR_FL))
+ && (ext2_cluster_is_compressed_fn
+ (inode, ext2_offset_to_cluster (inode, inode->i_size))
+ > 0)) {
+ trace_e2c("fix_compression: inode:%lu decompress_cluster!\n", inode->i_ino);
+ ext2_decompress_cluster(inode, ext2_offset_to_cluster(inode, inode->i_size));
+ /* todo: Check the return code of
+ ext2_decompress_cluster(). (Then again, I don't
+ know how to report an error anyway.
+ ext2_truncate() silently ignores errors.) */
+
+ /* Organise for the cluster to be recompressed later. */
+ assert (ei->i_flags & EXT2_COMPR_FL);
+
+ ei->i_flags |= EXT2_DIRTY_FL;
+ ei->i_compr_flags |= EXT2_CLEANUP_FL;
+ mark_inode_dirty(inode);
+ } else
+ /* If there are no more compressed clusters, then
+ remove the EXT2_COMPRBLK_FL. Not essential from a
+ safety point of view, but friendlier. We only do
+ this in the `else' because the cleanup function
+ will handle it in the `if' case. */
+ ext2_update_comprblk(inode);
+}
+#endif
+
+
static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
__le32 *i_data = EXT2_I(inode)->i_data;
@@ -1081,6 +1887,27 @@ static void __ext2_truncate_blocks(struc
int n;
long iblock;
unsigned blocksize;
+
+#ifdef CONFIG_EXT2_COMPRESS
+ /* If the new size is in the middle of a compressed cluster,
+ then we decompress it, and set things up to be recompressed
+ later.
+
+ todo: It isn't very nice to get ENOSPC on truncate. We
+ can't completely remove the possibility (unless the
+ compression algorithms obey the rule `shorter input never
+ gives longer output') but we could greatly reduce the
+ possibility, e.g. by moving the fix_compression() function
+ to compress.c, and have it decompress and immediately
+ recompress the cluster, without allocating blocks for the
+ full decompressed data. */
+ if (EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL) {
+ trace_e2c("ext2_truncate: ino=%ld sz=%d\n", inode->i_ino, (int)inode->i_size);
+ fix_compression(inode);
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ }
+#endif
+
blocksize = inode->i_sb->s_blocksize;
iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
@@ -1151,8 +1978,11 @@ do_indirects:
mutex_unlock(&ei->truncate_mutex);
}
-
+#ifdef CONFIG_EXT2_COMPRESS
+void ext2_truncate_blocks(struct inode *inode, loff_t offset)
+#else
static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
+#endif
{
/*
* XXX: it seems like a bug here that we don't allow
@@ -1340,7 +2170,73 @@ struct inode *ext2_iget (struct super_bl
goto bad_inode;
}
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
+#ifdef CONFIG_EXT2_COMPRESS
+ ei->i_flags = 0x807fffff & le32_to_cpu(raw_inode->i_flags);
+ ei->i_compr_flags = 0;
+ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) {
+
+ if (S_ISDIR(inode->i_mode))
+ {
+ //mw:
+ //mutex_lock(&inode->i_mutex);
+ if (S_ISDIR(inode->i_mode))
+ {
+ ei->i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL); //modify!!!
+ }
+ //mutex_unlock(&inode->i_mutex);
+ }
+
+ /* The above shouldn't be necessary unless someone's
+ * been playing with EXT2_IOC_SETFLAGS on a non-e2compr
+ * kernel, or the inode has been scribbled on.
+ */
+ if (ei->i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL)) {
+ ei->i_compr_method
+ = (le32_to_cpu(raw_inode->i_flags) >> 26) & 0x1f;
+ ei->i_log2_clu_nblocks
+ = (le32_to_cpu(raw_inode->i_flags) >> 23) & 0x7;
+ if ((ei->i_log2_clu_nblocks < 2)
+ || (ei->i_log2_clu_nblocks > 5)) {
+ if ((ei->i_log2_clu_nblocks == 0)
+ && !(ei->i_flags & EXT2_COMPRBLK_FL)) {
+ /* The EXT2_COMPR_FL flag was
+ * raised under a kernel
+ * without e2compr support.
+ */
+ if (S_ISREG(inode->i_mode))
+ ei->i_flags |= EXT2_DIRTY_FL;
+ /* Todo: once we're sure the kernel can
+ * handle [log2_]clu_nblocks==0, get rid
+ * of the next statement.
+ */
+ ei->i_log2_clu_nblocks
+ = EXT2_DEFAULT_LOG2_CLU_NBLOCKS;
+ } else {
+ ei->i_flags |= EXT2_ECOMPR_FL;
+ ext2_error(inode->i_sb,
+ "ext2_read_inode",
+ "inode %lu is corrupted: "
+ "log2_clu_nblocks=%u",
+ inode->i_ino,
+ ei->i_log2_clu_nblocks);
+ }
+ }
+ } else {
+ ei->i_compr_method = EXT2_DEFAULT_COMPR_METHOD;
+ ei->i_log2_clu_nblocks
+ = EXT2_DEFAULT_LOG2_CLU_NBLOCKS;
+ }
+ if (ei->i_log2_clu_nblocks >
+ (EXT2_LOG2_MAX_CLUSTER_BYTES - inode->i_sb->s_blocksize_bits))
+ ei->i_log2_clu_nblocks = (EXT2_LOG2_MAX_CLUSTER_BYTES
+ - inode->i_sb->s_blocksize_bits);
+ ei->i_clu_nblocks = 1 << ei->i_log2_clu_nblocks;
+ if (ei->i_flags & EXT2_DIRTY_FL)
+ ei->i_compr_flags = EXT2_CLEANUP_FL;
+ }
+#else /* !CONFIG_EXT2_COMPRESS */
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+#endif
ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
ei->i_frag_no = raw_inode->i_frag;
ei->i_frag_size = raw_inode->i_fsize;
@@ -1463,7 +2359,35 @@ static int __ext2_write_inode(struct ino
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
+#ifdef CONFIG_EXT2_COMPRESS
+ if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
+ && (ei->i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL))) {
+ if ((ei->i_log2_clu_nblocks < 2)
+ || (ei->i_log2_clu_nblocks > 5)) {
+ ei->i_flags |= EXT2_ECOMPR_FL;
+ ext2_error (inode->i_sb, "ext2_write_inode",
+ "inode %lu is corrupted: log2_clu_nblocks=%u",
+ inode->i_ino, ei->i_log2_clu_nblocks);
+ }
+ assert (ei->i_clu_nblocks == (1 << ei->i_log2_clu_nblocks));
+ assert (ei->i_compr_method < 0x20);
+ raw_inode->i_flags = cpu_to_le32
+ ((ei->i_flags & 0x807fffff)
+ | (ei->i_compr_method << 26)
+ | (ei->i_log2_clu_nblocks << 23));
+ } else
+ {
+ //mw: i_mutex was introduced and disabled again: deadlock with lilo
+ // mutex_lock(&inode->i_mutex); //mw
+ raw_inode->i_flags = cpu_to_le32 //modify !!!
+ (ei->i_flags
+ & 0x807fffff /* no compr meth/size */
+ & ~(EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_IMMUTABLE_FL | EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL));
+ // mutex_unlock(&inode->i_mutex); //mw
+ }
+#else
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+#endif
raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
raw_inode->i_frag = ei->i_frag_no;
raw_inode->i_fsize = ei->i_frag_size;
--- linux-3.2-rc5/fs/ext2/file.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/file.c 2011-12-13 14:22:47.853976220 +0100
@@ -18,10 +18,25 @@
* (jj@sunsite.ms.mff.cuni.cz)
*/
+#ifdef CONFIG_EXT2_COMPRESS
+#include <linux/fs.h>
+#include <linux/ext2_fs_c.h>
+#include <linux/buffer_head.h>
+#include <asm/uaccess.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/writeback.h>
+#else
#include <linux/time.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include "ext2.h"
+#endif
+
+
#include "xattr.h"
#include "acl.h"
@@ -30,8 +45,39 @@
* for a single struct file are closed. Note that different open() calls
* for the same file yield different struct file structures.
*/
+
+/*
+ * pjm 1998-01-09: I would note that this is different from `when no
+ * process has the inode open'.
+ */
static int ext2_release_file (struct inode * inode, struct file * filp)
{
+#ifdef CONFIG_EXT2_COMPRESS
+ /*
+ * Now's as good a time as any to clean up wrt compression.
+ * Previously (before 2.1.4x) we waited until
+ * ext2_put_inode(), but now the dcache sometimes delays that
+ * call until umount time.
+ */
+ //printk(KERN_DEBUG "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n", current->pid, inode->i_ino, atomic_read(&inode->i_count));
+
+ if (S_ISREG (inode->i_mode)
+ && inode->i_nlink
+ && (EXT2_I(inode)->i_compr_flags & EXT2_CLEANUP_FL)) {
+#ifdef EXT2_COMPR_REPORT_PUT
+ printk(KERN_DEBUG "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n", current->pid, inode->i_ino, atomic_read(&inode->i_count));
+#endif
+ /*
+ * todo: See how the return code of
+ * ext2_release_file() is used, and decide whether it
+ * might be appropriate to pass any errors to
+ * caller.
+ */
+ //dump_stack();
+ (void) ext2_cleanup_compressed_inode (inode);
+ }
+
+#endif
if (filp->f_mode & FMODE_WRITE) {
mutex_lock(&EXT2_I(inode)->truncate_mutex);
ext2_discard_reservation(inode);
@@ -56,6 +102,456 @@ int ext2_fsync(struct file *file, loff_t
return ret;
}
+#ifdef CONFIG_EXT2_COMPRESS
+struct page_cluster {
+ struct page * page;
+ loff_t pos;
+ unsigned bytes;
+ unsigned long offset;
+ unsigned char in_range;
+ const char * buf;
+};
+
+#define PAGE_IN_RANGE 1
+#define PAGE_KMAPPED 2
+
+
+/**
+ * generic_osync_inode - flush all dirty data for a given inode to disk
+ * @inode: inode to write
+ * @mapping: the address_space that should be flushed
+ * @what: what to write and wait upon
+ *
+ * This can be called by file_write functions for files which have the
+ * O_SYNC flag set, to flush dirty writes to disk.
+ *
+ * @what is a bitmask, specifying which part of the inode's data should be
+ * written and waited upon.
+ *
+ * OSYNC_DATA: i_mapping's dirty data
+ * OSYNC_METADATA: the buffers at i_mapping->private_list
+ * OSYNC_INODE: the inode itself
+ */
+
+/* mw: see generic_osync_inode() in kernel<2.6.30 for orginal method.
+ basically we want all of it: OSYNC_DATA and OSYNC_METADATA and OSYNC_INODE */
+int ex_generic_osync_inode(struct inode *inode, struct address_space *mapping) //, int what)
+{
+ int err = 0;
+ int need_write_inode_now = 0;
+ int err2;
+
+ err = filemap_fdatawrite(mapping);
+
+ err2 = sync_mapping_buffers(mapping);
+ if (!err)
+ err = err2;
+
+ err2 = filemap_fdatawait(mapping);
+ if (!err)
+ err = err2;
+
+ /* check if data is dirty */
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & I_DIRTY)
+ need_write_inode_now = 1;
+ spin_unlock(&inode->i_lock);
+
+ if (need_write_inode_now) {
+ err2 = write_inode_now(inode, 1);
+ if (!err)
+ err = err2;
+ }
+ else
+ inode_sync_wait(inode);
+
+ return err;
+}
+
+
+/*
+ * Write to a file through the page cache.
+ *
+ * We currently put everything into the page cache prior to writing it.
+ * This is not a problem when writing full pages. With partial pages,
+ * however, we first have to read the data into the cache, then
+ * dirty the page, and finally schedule it for writing. Alternatively, we
+ * could write-through just the portion of data that would go into that
+ * page, but that would kill performance for applications that write data
+ * line by line, and it's prone to race conditions.
+ *
+ * Note that this routine doesn't try to keep track of dirty pages. Each
+ * file system has to do this all by itself, unfortunately.
+ * okir@monad.swb.de
+ */
+ssize_t
+ext2_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
+{
+ struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
+ struct inode *inode = mapping->host;
+ unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur, written, last_index; /* last page index */
+ loff_t pos;
+ long status;
+ int err;
+ unsigned bytes;
+ u32 comprblk_mask=0;
+ struct ext2_inode_info *ei = EXT2_I(inode);
+
+ if (!(ei->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL))
+#undef DUD //mw: I think this is a buggy bug-fix
+#ifdef DUD
+ || (count < inode->i_sb->s_blocksize)
+#endif
+ )
+ {
+ return do_sync_write(file, buf, count, ppos);
+ }
+
+ if ((ssize_t) count < 0)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_READ, buf, count))
+ return -EFAULT;
+
+#ifdef EXT2_COMPR_REPORT_MUTEX
+ printk(KERN_DEBUG "EXT2_FILE_WRITE_LOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino );
+#endif
+ mutex_lock(&inode->i_mutex);
+ /* mw: down_read(&inode->i_alloc_sem); // as used by ocsf2 TLL 02/21/07
+ was removed with kernel 3.1 */
+ atomic_inc(&inode->i_dio_count);
+
+ pos = *ppos;
+ err = -EINVAL;
+ if (pos < 0)
+ goto out;
+
+ written = 0;
+
+ /* FIXME: this is for backwards compatibility with 2.4 */
+ if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
+ {
+ pos = inode->i_size;
+ }
+
+ /*
+ * Check whether we've reached the file size limit.
+ */
+ err = -EFBIG;
+
+ if (limit != RLIM_INFINITY) {
+ if (pos >= limit) {
+ send_sig(SIGXFSZ, current, 0);
+ goto out;
+ }
+ if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
+ /* send_sig(SIGXFSZ, current, 0); */
+ count = limit - (u32)pos;
+ }
+ }
+
+ /*
+ * LFS rule
+ */
+ if ( pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
+ if (pos >= MAX_NON_LFS) {
+ send_sig(SIGXFSZ, current, 0);
+ goto out;
+ }
+ if (count > MAX_NON_LFS - (u32)pos) {
+ /* send_sig(SIGXFSZ, current, 0); */
+ count = MAX_NON_LFS - (u32)pos;
+ }
+ }
+
+ /*
+ * Are we about to exceed the fs block limit ?
+ *
+ * If we have written data it becomes a short write
+ * If we have exceeded without writing data we send
+ * a signal and give them an EFBIG.
+ *
+ * Linus frestrict idea will clean these up nicely..
+ */
+ if (!S_ISBLK(inode->i_mode)) {
+ if (pos >= inode->i_sb->s_maxbytes) {
+ if (count || pos > inode->i_sb->s_maxbytes) {
+ send_sig(SIGXFSZ, current, 0);
+ err = -EFBIG;
+ goto out;
+ }
+ /* zero-length writes at ->s_maxbytes are OK */
+ }
+
+ if (pos + count > inode->i_sb->s_maxbytes)
+ count = inode->i_sb->s_maxbytes - pos;
+ } else {
+ if (bdev_read_only(inode->i_sb->s_bdev)) {
+ err = -EPERM;
+ goto out;
+ }
+ if (pos >= inode->i_size) {
+ if (count || pos > inode->i_size) {
+ err = -ENOSPC;
+ goto out;
+ }
+ }
+
+ if (pos + count > inode->i_size)
+ {
+ count = inode->i_size - pos;
+ }
+ }
+
+ err = 0;
+ if (count == 0)
+ goto out;
+
+ status = 0;
+
+ if (file->f_flags & O_DIRECT)
+ {
+ err = -EINVAL;
+ goto out;
+ }
+ /*
+ * We must still check for EXT2_ECOMPR_FL, as it may have been
+ * set after we got the write permission to this file.
+ */
+ if ((ei->i_flags & (EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL)) == (EXT2_ECOMPR_FL | 0))
+ {
+ err = -EXT2_ECOMPR;
+ goto out;
+ }
+
+ should_remove_suid(file->f_dentry);
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+
+ if ((pos+count) > inode->i_size)
+ last_index = (pos+count-1) >> PAGE_CACHE_SHIFT;
+ else
+ last_index = (inode->i_size-1) >> PAGE_CACHE_SHIFT;
+
+ comprblk_mask = ei->i_flags | ~EXT2_COMPRBLK_FL;
+
+ //mw: now do it cluster-wise
+ do {
+ //unsigned long index, offset, clusters_page_index0,
+ unsigned long index, nextClusterFirstByte, cluster_compressed=0;
+ u32 cluster=0;
+ status = -ENOMEM; /* we'll assign it later anyway */
+
+#ifdef EXT2_COMPRESS_WHEN_CLU
+ ei->i_flags |= EXT2_COMPRBLK_FL;
+ assert( (file->f_flags & O_DIRECT) == 0);
+ assert(mapping_mapped(inode->i_mapping) == 0);
+#endif
+
+ index = pos >> PAGE_CACHE_SHIFT; /*mw: pageindex (start)*/
+ cluster = ext2_page_to_cluster(inode, index);
+
+ /*
+ * We decompress the cluster if needed, and write
+ * the data as normal. The cluster will be
+ * compressed again when the inode is cleaned up.
+ */
+ if ((comprblk_mask == ~(u32)0)
+ && !(ei->i_flags & EXT2_NOCOMPR_FL)) {
+ /* AUFFÄLLIG 2*/
+ /* assert (block == pos >> inode->i_sb->s_blocksize_bits); */
+
+ cluster_compressed = ext2_cluster_is_compressed_fn(inode, cluster);
+ if (cluster_compressed < 0) {
+ if (! written)
+ written = cluster_compressed;
+ break;
+ }
+ }
+
+ if (cluster_compressed > 0) {
+ /* Here, decompression take place */
+ cluster_compressed = ext2_decompress_cluster(inode, cluster);
+ if (cluster_compressed < 0) {
+ if (! written) {
+ written = cluster_compressed;
+ }
+ break;
+ }
+ }
+
+ nextClusterFirstByte = (ext2_cluster_page0(inode, cluster+1) * PAGE_CACHE_SIZE);
+ bytes = nextClusterFirstByte - pos; /*mw: bytes todo in this cluster*/
+ if (bytes > count) {
+ bytes = count; /*mw: if end of data*/
+ }
+
+#ifdef EXT2_COMPR_DEBUG
+ //assert we stay inside the cluster!
+ {
+ int endpos;
+ int endindex;
+ int endcluster;
+ unsigned long thisClusterFirstByte;
+ int relstart, relend, startblock, endblock;
+
+ thisClusterFirstByte = (ext2_cluster_page0(inode, cluster) * PAGE_CACHE_SIZE);
+
+ relstart = pos - thisClusterFirstByte;
+ relend = bytes + relstart;
+
+ startblock = relstart >> 10;
+ endblock = relend >> 10;
+
+
+ endpos = pos + bytes;
+ //printk("do_sync_write cluster %d: inode:%lu, \t start:%i(%i), end:%i(%i), \t ccount:%d \t tcount:%d\n", cluster , inode->i_ino, relstart, startblock, relend , endblock, (int)bytes, count);
+ endindex = (endpos-1) >> PAGE_CACHE_SHIFT; /*mw: pageindex (start)*/
+ endcluster = ext2_page_to_cluster(inode, endindex);
+ assert(cluster == endcluster);
+ }
+#endif
+
+ //mw: must unlock here, do_sync_write() will aquire the mutex again
+ mutex_unlock(&inode->i_mutex);
+
+ //mw: this is pretty clever: we use the generic method now :-)
+ //printk("do_sync_write cluster %d, mapped:%i\n", cluster, mapping_mapped(inode->i_mapping));
+ //status = do_sync_write_nolock(file, buf, bytes, &pos); //without locking mutex
+ status = do_sync_write(file, buf, bytes, &pos); //with locking mutex
+ assert(status>=0);
+
+ mutex_lock(&inode->i_mutex);
+
+ written += status;
+ count -= status;
+ buf += status;
+
+#ifdef EXT2_COMPRESS_WHEN_CLU
+ assert (ei->i_flags & EXT2_COMPRBLK_FL);
+ if ((ei->i_flags & EXT2_COMPR_FL)
+ && (ext2_offset_is_clu_boundary(inode, pos)) ) {
+
+ if (mapping_mapped(inode->i_mapping) == 0 )
+ /*
+ * Pierre Peiffer: For file mapped (via mmap, I mean),
+ * compression will occure when releasing the file.
+ * We must, in this case, avoid the pages (possibly
+ * mapped by a process) to be compressed under them.
+ */
+ {
+ int error;
+ assert(mapping_mapped(inode->i_mapping) == 0);
+ error = ext2_compress_cluster(inode, cluster);
+ /*if (ext2_cluster_is_compressed_fn(inode, cluster))
+ ext2_decompress_cluster(inode, cluster);*/
+ assert(mapping_mapped(inode->i_mapping) == 0);
+ /*
+ * Actually, raising write_error may be a
+ * mistake. For example,
+ * ext2_cleanup_compressed_cluster() doesn't
+ * usually return any errors to user. todo:
+ * Have a look at ext2_compress_cluster, and
+ * check whether its errors are such that they
+ * should be returned to user. Some of the
+ * will be, of course, but it might be
+ * possible for it to return without
+ * change.
+ */
+ if (error > 0)
+ comprblk_mask = ~(u32)0;
+ } else {
+#ifdef EXT2_COMPR_REPORT
+ char bdn[BDEVNAME_SIZE];
+ bdevname(inode->i_sb->s_bdev, bdn);
+#endif
+
+ trace_e2c("ext2_file_write: (dev. %s): "
+ "ino=%ld, cluster=%d: file mapped, does "
+ "not compress cluster\n",
+ bdn, inode->i_ino, cluster);
+ ei->i_flags |= EXT2_DIRTY_FL;
+ ei->i_compr_flags |= EXT2_CLEANUP_FL;
+ }
+ }
+#endif
+
+ } while (count);
+ *ppos = pos;
+
+ /*
+ * For now, when the user asks for O_SYNC, we'll actually
+ * provide O_DSYNC.
+ */
+ if (status >= 0) {
+ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
+ /*if (ei->i_compr_flags & EXT2_OSYNC_INODE) {
+ osync_already = 1;
+ } else {
+ osync_already = 0;
+ ei->i_compr_flags |= EXT2_OSYNC_INODE;
+ }*/
+ /* Should 2nd arg be inode->i_mapping? */
+ status = ex_generic_osync_inode(inode, file->f_mapping
+ /*, OSYNC_METADATA|OSYNC_DATA*/);
+ /*if (osync_already == 0) {
+ ei->i_compr_flags &= ~EXT2_OSYNC_INODE;
+ }*/
+ }
+ }
+
+ err = written ? written : status;
+
+# ifdef EXT2_COMPRESS_WHEN_CLU
+ //mw: ext2_compress_cluster() might remove EXT2_COMPRBLK_FL
+ //if the file does not compress at all. this is NO error: remove next line?
+ //assert (ei->i_flags & EXT2_COMPRBLK_FL);
+
+ ei->i_flags &= comprblk_mask;
+ if ( (ei->i_flags & EXT2_COMPR_FL)
+ && (!ext2_offset_is_clu_boundary(inode, pos)) )
+ {
+ ei->i_flags |= EXT2_DIRTY_FL;
+ ei->i_compr_flags |= EXT2_CLEANUP_FL;
+ }
+
+# else
+ if (ei->i_flags & EXT2_COMPR_FL) {
+ ei->i_flags |= EXT2_DIRTY_FL;
+ ei->i_compr_flags |= EXT2_CLEANUP_FL;
+ }
+# endif
+out:
+
+#ifdef EXT2_COMPR_REPORT_MUTEX
+ printk(KERN_DEBUG "EXT2_FILE_WRITE_UNLOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
+#endif
+ /* mw: up_read(&inode->i_alloc_sem); // as used by ocsf2 TLL 02/21/07
+ was removed with kernel 3.1 */
+ inode_dio_done(inode);
+ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
+/*
+ * Called when an inode is about to be open.
+ * We use this to disallow opening RW large files on 32bit systems if
+ * the caller didn't specify O_LARGEFILE. On 64bit systems we force
+ * on this flag in sys_open.
+ * Prevent opening compressed file with O_DIRECT.
+ */
+static int ext2_file_open(struct inode * inode, struct file * filp)
+{
+ if ((filp->f_flags & O_DIRECT) && (EXT2_I(inode)->i_flags &
+ (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)))
+ return -EINVAL;
+ if (!(filp->f_flags & O_LARGEFILE) && inode->i_size > MAX_NON_LFS)
+ return -EFBIG;
+
+ return 0;
+ }
+#endif /* CONFIG_EXT2_COMPRESS*/
+
/*
* We have mostly NULL's here: the current defaults are ok for
* the ext2 filesystem.
@@ -63,7 +559,12 @@ int ext2_fsync(struct file *file, loff_t
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
+#ifdef CONFIG_EXT2_COMPRESS
+ .write = ext2_file_write,
+#else
.write = do_sync_write,
+#endif
+
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.unlocked_ioctl = ext2_ioctl,
@@ -71,7 +572,11 @@ const struct file_operations ext2_file_o
.compat_ioctl = ext2_compat_ioctl,
#endif
.mmap = generic_file_mmap,
+#ifdef CONFIG_EXT2_COMPRESS
+ .open = ext2_file_open,
+#else
.open = dquot_file_open,
+#endif
.release = ext2_release_file,
.fsync = ext2_fsync,
.splice_read = generic_file_splice_read,
--- linux-3.2-rc5/fs/ext2/ioctl.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/ioctl.c 2011-12-13 14:22:47.855976282 +0100
@@ -7,7 +7,14 @@
* Universite Pierre et Marie Curie (Paris VI)
*/
+#ifdef CONFIG_EXT2_COMPRESS
+#include <linux/fs.h>
+#include <linux/ext2_fs_c.h>
+#include <linux/kmod.h>
+#include <linux/stat.h>
+#else
#include "ext2.h"
+#endif
#include <linux/capability.h>
#include <linux/time.h>
#include <linux/sched.h>
@@ -17,6 +24,65 @@
#include <asm/uaccess.h>
+#ifdef CONFIG_EXT2_COMPRESS
+
+#ifndef MIN
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifdef CONFIG_GZ_HACK
+static int check_name(struct inode *ino)
+{
+ struct dentry *dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
+ if (dentry)
+ if (
+
+ (dentry->d_name.len >= 4) &&
+ (((dentry->d_name.name[dentry->d_name.len - 2] == 'g')
+ && (dentry->d_name.name[dentry->d_name.len - 1] == 'z')
+ && ((dentry->d_name.name[dentry->d_name.len - 3] == '.')
+ || (dentry->d_name.name[dentry->d_name.len - 4] == '.')))
+
+ || ((dentry->d_name.name[dentry->d_name.len - 3] == 't')
+ && (dentry->d_name.name[dentry->d_name.len - 2] == 'g')
+ && (dentry->d_name.name[dentry->d_name.len - 1] == 'z')
+ && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
+ && (dentry->d_name.len >= 5))
+
+ || ((dentry->d_name.name[dentry->d_name.len - 3] == 'p')
+ && (dentry->d_name.name[dentry->d_name.len - 2] == 'n')
+ && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
+ && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
+ && (dentry->d_name.len >= 5))
+
+ || ((dentry->d_name.name[dentry->d_name.len - 3] == 'j')
+ && (dentry->d_name.name[dentry->d_name.len - 2] == 'p')
+ && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
+ && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
+ && (dentry->d_name.len >= 5))
+
+ || ((dentry->d_name.name[dentry->d_name.len - 3] == 'b')
+ && (dentry->d_name.name[dentry->d_name.len - 2] == 'z')
+ && (dentry->d_name.name[dentry->d_name.len - 1] == '2')
+ && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
+ && (dentry->d_name.len >= 5))
+
+ || ((dentry->d_name.name[dentry->d_name.len - 3] == 'm')
+ && (dentry->d_name.name[dentry->d_name.len - 2] == 'n')
+ && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
+ && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
+ && (dentry->d_name.len >= 5))
+ )
+ ) {
+ return 1;
+ }
+ return 0;
+}
+#endif
+#endif
+
+
+
long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = filp->f_dentry->d_inode;
@@ -24,6 +90,10 @@ long ext2_ioctl(struct file *filp, unsig
unsigned int flags;
unsigned short rsv_window_size;
int ret;
+#ifdef CONFIG_EXT2_COMPRESS
+ unsigned long datum;
+ int err;
+#endif
ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
@@ -75,7 +145,127 @@ long ext2_ioctl(struct file *filp, unsig
}
flags = flags & EXT2_FL_USER_MODIFIABLE;
+#ifdef CONFIG_EXT2_COMPRESS
+ if (S_ISREG (inode->i_mode) || S_ISDIR (inode->i_mode)) {
+
+ /* pjm 1998-01-14: In previous versions of
+ e2compr, the kernel forbade raising
+ EXT2_ECOMPR_FL from userspace. I can't
+ think of any purpose for forbidding this,
+ and I find it useful to raise
+ EXT2_ECOMPR_FL for testing purposes, so
+ I've removed the forbidding code. */
+ if (S_ISREG (inode->i_mode)
+ && (EXT2_NOCOMPR_FL
+ & (flags ^ ei->i_flags))) { // mw hint: ^ is a (excluisive OR)
+ /* NOCOMPR_FL can only be changed if
+ nobody else has the file opened. */
+ /* pjm 1998-02-16: inode->i_count is
+ useless to us because only dentries
+ use inodes now. Unfortunately,
+ there isn't an easy way of finding
+ the equivalent. We'd have to go
+ through all dentries using the
+ inode, and sum their d_count
+ values. Rather than do that, I'd
+ rather get rid of the exclusion
+ constraint. todo. */
+ //printk("i_count: %i\n", atomic_read(&inode->i_count));
+ //if (atomic_read(&inode->i_count) > 1)
+ //if (0)
+ if (ext2_get_dcount(inode) > 1)
+ {
+ mutex_unlock(&inode->i_mutex); /*mw*/
+ return -ETXTBSY;
+ }
+ else {
+ /* pjm 970429: Discarding
+ cached pages is not very
+ clean, but should work. */
+ /* pjm 980114: Not quite. We
+ should also sync any
+ mappings to buffers first.
+ This isn't very important,
+ as none of the current
+ e2compr programs can
+ trigger this, but todo. */
+ invalidate_remote_inode (inode);
+ }
+ }
+
+ if (EXT2_COMPR_FL
+ & (flags ^ ei->i_flags)) {
+ if (flags & EXT2_COMPR_FL) {
+ if (ei->i_flags & EXT2_COMPRBLK_FL) {
+ /* There shouldn't actually be any
+ compressed blocks, AFAIK. However,
+ this is still possible because sometimes
+ COMPRBLK gets raised just to stop
+ us changing cluster size at the wrong
+ time.
+
+ todo: Call a function that just
+ checks that there are not compressed
+ clusters, and print a warning if any are
+ found. */
+ } else {
+ int bits = MIN(EXT2_DEFAULT_LOG2_CLU_NBLOCKS,
+ (EXT2_LOG2_MAX_CLUSTER_BYTES
+ - inode->i_sb->s_blocksize_bits));
+
+ ei->i_log2_clu_nblocks = bits;
+ ei->i_clu_nblocks = 1 << bits;
+ }
+ ei->i_compr_method = EXT2_DEFAULT_COMPR_METHOD;
+ if (S_ISREG (inode->i_mode)) {
+ //compress
+#ifdef CONFIG_GZ_HACK
+ /* mw: check for .gz-files and similar
+ * I think this is the most clever place for
+ * rejecting files. They remain regular, uncompressed
+ * files and though can be read bypassing all
+ * compression stuff (= fast) :-). And it seems to save
+ * space... somehow */
+ if (check_name (inode))
+ {
+ //printk("non-compressable file extension\n");
+ mutex_unlock(&inode->i_mutex);
+ return 0;
+ }
+#endif
+ //set flags to trigger compression later on
+ flags |= EXT2_DIRTY_FL;
+ ei->i_compr_flags |= EXT2_CLEANUP_FL;
+ }
+ } else if (S_ISREG (inode->i_mode)) {
+ if (ei->i_flags & EXT2_COMPRBLK_FL) {
+ int err;
+
+ if (ext2_get_dcount(inode) > 1){
+ mutex_unlock(&inode->i_mutex); //mw
+ return -ETXTBSY;
+ }
+ err = ext2_decompress_inode(inode);
+ if (err)
+ {
+ mutex_unlock(&inode->i_mutex); //mw
+ return err;
+ }
+ }
+ ei->i_flags &= ~EXT2_DIRTY_FL;
+ ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
+ }
+ }
+ }
+#endif
flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
+#ifdef CONFIG_EXT2_COMPRESS
+ /* bug fix: scrub 'B' flag from uncompressed files TLL 02/28/07 */
+ if (!(flags & EXT2_COMPR_FL) && (flags & EXT2_COMPRBLK_FL) )
+ {
+ flags &= ~EXT2_COMPRBLK_FL;
+ }
+#endif
ei->i_flags = flags;
mutex_unlock(&inode->i_mutex);
@@ -148,6 +338,184 @@ setflags_out:
mnt_drop_write(filp->f_path.mnt);
return 0;
}
+#ifdef CONFIG_EXT2_COMPRESS
+ case EXT2_IOC_GETCOMPRMETHOD: /* Result means nothing if COMPR_FL is not set */
+ return put_user (ei->i_compr_method, (long *) arg);
+ case EXT2_IOC_SETCOMPRMETHOD:
+ if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EPERM;
+ if (IS_RDONLY (inode))
+ return -EROFS;
+ if (get_user (datum, (long*) arg))
+ return -EFAULT;
+ if (!S_ISREG (inode->i_mode) && !S_ISDIR (inode->i_mode))
+ return -ENOSYS;
+ /* todo: Allow the below, but set initial value of
+ i_compr_meth at read_inode() time (using default if
+ !/) instead of +c time. Same for cluster
+ size. */
+ if ((unsigned) datum >= EXT2_N_METHODS)
+ return -EINVAL;
+ if (ei->i_compr_method != datum) {
+ if ((ei->i_compr_method == EXT2_NEVER_METH)
+ && (ei->i_flags & EXT2_COMPR_FL))
+ return -EPERM;
+ /* If the previous method was `defer' then
+ take a look at all uncompressed clusters
+ and try to compress them. (pjm 1997-04-16) */
+ if ((ei->i_compr_method == EXT2_DEFER_METH)
+ && S_ISREG (inode->i_mode)) {
+ ei->i_flags |= EXT2_DIRTY_FL;
+ ei->i_compr_flags |= EXT2_CLEANUP_FL;
+ }
+ if ((datum == EXT2_NEVER_METH)
+ && S_ISREG (inode->i_mode)) {
+ //printk("SETCOMPR\n");
+ if ((ei->i_flags & EXT2_COMPRBLK_FL))
+ {
+ /*mw*/
+ mutex_lock(&inode->i_mutex);
+ if (ext2_get_dcount(inode) > 1){
+ mutex_unlock(&inode->i_mutex); /*mw*/
+ return -ETXTBSY;
+ }
+ err = ext2_decompress_inode(inode);
+ mutex_unlock(&inode->i_mutex);
+ if ( err < 0)
+ return err;
+ }
+ ei->i_flags &= ~EXT2_DIRTY_FL;
+ ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
+ }
+ ei->i_compr_method = datum;
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ }
+#ifdef CONFIG_KMOD
+ if (!ext2_algorithm_table[ext2_method_table[datum].alg].avail) {
+ char str[32];
+
+ sprintf(str, "ext2-compr-%s", ext2_algorithm_table[ext2_method_table[datum].alg].name);
+ request_module(str);
+ }
+#endif
+ datum = ((datum < EXT2_N_METHODS)
+ && (ext2_algorithm_table[ext2_method_table[datum].alg].avail));
+ return put_user(datum, (long *)arg);
+
+ case EXT2_IOC_GETCLUSTERBIT:
+ if (get_user (datum, (long*) arg))
+ return -EFAULT;
+ if (!S_ISREG (inode->i_mode))
+ return -ENOSYS;
+ /* We don't do `down(&inode->i_sem)' here because
+ there's no way for userspace to do the
+ corresponding up(). Userspace must rely on
+ EXT2_NOCOMPR_FL if it needs to lock. */
+ err = ext2_cluster_is_compressed (inode, datum);
+ if (err < 0)
+ return err;
+ return put_user ((err ? 1 : 0),
+ (long *) arg);
+
+ case EXT2_IOC_RECOGNIZE_COMPRESSED:
+ if (get_user (datum, (long*) arg))
+ return -EFAULT;
+ if (!S_ISREG (inode->i_mode))
+ return -ENOSYS;
+ if (IS_RDONLY (inode))
+ return -EROFS;
+ return ext2_recognize_compressed (inode, datum);
+
+ case EXT2_IOC_GETCLUSTERSIZE:
+ /* Result means nothing if COMPR_FL is not set (until
+ SETCLUSTERSIZE w/o COMPR_FL is implemented;
+ todo). */
+ if (!S_ISREG (inode->i_mode)
+ && !S_ISDIR (inode->i_mode))
+ return -ENOSYS;
+ return put_user (ei->i_clu_nblocks, (long *) arg);
+
+ case EXT2_IOC_GETFIRSTCLUSTERSIZE:
+ /* Result means nothing if COMPR_FL is not set (until
+ SETCLUSTERSIZE w/o COMPR_FL is implemented;
+ todo). */
+ if (!S_ISREG (inode->i_mode)
+ && !S_ISDIR (inode->i_mode))
+ return -ENOSYS;
+ return put_user (ext2_first_cluster_nblocks(inode), (long *) arg);
+
+ case EXT2_IOC_SETCLUSTERSIZE:
+ if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EPERM;
+ if (IS_RDONLY (inode))
+ return -EROFS;
+ if (get_user (datum, (long *) arg))
+ return -EFAULT;
+ if (!S_ISREG (inode->i_mode)
+ && !S_ISDIR (inode->i_mode))
+ return -ENOSYS;
+
+ /* These are the only possible cluster sizes. The
+ cluster size must be a power of two so that
+ clusters don't straddle address (aka indirect)
+ blocks. At the moment, the upper limit is constrained
+ by how much memory is allocated for de/compression.
+ Also, the gzip algorithms have some optimisations
+ that assume tht the input is no more than 32KB,
+ and in compress.c we would need to zero more bits
+ of head->holemap. (In previous releases, the file
+ format was limited to 32 blocks and under 64KB.) */
+// #if EXT2_MAX_CLUSTER_BLOCKS > 32 || EXT2_MAX_CLUSTER_NBYTES > 32768
+// # error "This code not updated for cluster size yet."
+// #endif
+ switch (datum) {
+ case (1 << 2): datum = 2; break;
+ case (1 << 3): datum = 3; break;
+ case (1 << 4): datum = 4; break;
+ case (1 << 5): datum = 5; break;
+ default: return -EINVAL;
+ }
+
+ assert (ei->i_clu_nblocks == (1 << ei->i_log2_clu_nblocks));
+ if (datum == ei->i_log2_clu_nblocks)
+ return 0;
+
+ if (ei->i_flags & EXT2_ECOMPR_FL)
+ return -EPERM;
+ if (!(ei->i_flags & EXT2_COMPR_FL))
+ return -ENOSYS;
+
+ /* We currently lack a mechanism to change the cluster
+ size if there are already some compressed clusters.
+ The compression must be done in userspace
+ (e.g. with the e2compress program) instead. */
+ if (ei->i_flags & EXT2_COMPRBLK_FL)
+ return -ENOSYS;
+
+ if (datum + inode->i_sb->s_blocksize_bits
+ > EXT2_LOG2_MAX_CLUSTER_BYTES)
+ return -EINVAL;
+
+ ei->i_log2_clu_nblocks = datum;
+ ei->i_clu_nblocks = 1 << datum;
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ return 0;
+
+ case EXT2_IOC_GETCOMPRRATIO:
+ if (!S_ISREG (inode->i_mode))
+ return -ENOSYS;
+ if (ei->i_flags & EXT2_ECOMPR_FL)
+ return -EPERM;
+ if ((long) (datum = ext2_count_blocks (inode)) < 0)
+ return datum;
+ if ((err = put_user ((long) datum, (long*) arg)))
+ return err;
+ return put_user ((long) inode->i_blocks, (long*) arg + 1);
+
+
+#endif
default:
return -ENOTTY;
}
--- linux-3.2-rc5/fs/ext2/ext2.h 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/ext2/ext2.h 2011-12-13 14:22:47.855976282 +0100
@@ -37,6 +37,12 @@ struct ext2_inode_info {
struct ext2_block_alloc_info *i_block_alloc_info;
__u32 i_dir_start_lookup;
+#ifdef CONFIG_EXT2_COMPRESS
+ __u8 i_log2_clu_nblocks;
+ __u8 i_clu_nblocks;
+ __u8 i_compr_method;
+ __u8 i_compr_flags;
+#endif
#ifdef CONFIG_EXT2_FS_XATTR
/*
* Extended attributes can be read independently of the main file
@@ -126,6 +132,7 @@ extern void ext2_set_inode_flags(struct
extern void ext2_get_inode_flags(struct ext2_inode_info *);
extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
+extern void ext2_truncate_blocks(struct inode *inode, loff_t offset);
/* ioctl.c */
extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
--- linux-3.2-rc5/include/linux/ext2_fs.h 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/include/linux/ext2_fs.h 2011-12-13 14:22:47.856976313 +0100
@@ -87,6 +87,10 @@ static inline struct ext2_sb_info *EXT2_
/*
* Macro-instructions used to manage several block sizes
*/
+#define EXT2_GRAIN_SIZE 1024
+/* Minimum allocation unit. This is used in fs/ext2/compress.c to
+ check compr_len validity wrt (uncompressed) len. This definition
+ will probably need to be changed when fragments are implemented. */
#define EXT2_MIN_BLOCK_SIZE 1024
#define EXT2_MAX_BLOCK_SIZE 4096
#define EXT2_MIN_BLOCK_LOG_SIZE 10
@@ -178,9 +182,10 @@ struct ext2_group_desc
#define EXT2_NODUMP_FL FS_NODUMP_FL /* do not dump file */
#define EXT2_NOATIME_FL FS_NOATIME_FL /* do not update atime */
/* Reserved for compression usage... */
-#define EXT2_DIRTY_FL FS_DIRTY_FL
+#define EXT2_DIRTY_FL FS_DIRTY_FL /* Needs compressing; see Readme.e2compr */
#define EXT2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */
#define EXT2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */
+#define EXT2_NOCOMPR_FL FS_NOCOMP_FL /* Access raw data */
#define EXT2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */
/* End compression flags --- maybe not all used */
#define EXT2_BTREE_FL FS_BTREE_FL /* btree format dir */
@@ -342,6 +347,7 @@ struct ext2_inode {
#define EXT2_MOUNT_MINIX_DF 0x000080 /* Mimics the Minix statfs */
#define EXT2_MOUNT_NOBH 0x000100 /* No buffer_heads */
#define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */
+#define EXT2_MOUNT_FORCE_COMPAT 0x000400 /* Mount despite incompatibilities */
#define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */
#define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */
#define EXT2_MOUNT_XIP 0x010000 /* Execute in place */
@@ -507,8 +513,14 @@ struct ext2_super_block {
#define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff
#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
+#ifdef CONFIG_EXT2_COMPRESS
+#define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_COMPRESSION| \
+ EXT2_FEATURE_INCOMPAT_FILETYPE| \
+ EXT2_FEATURE_INCOMPAT_META_BG)
+#else
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
EXT2_FEATURE_INCOMPAT_META_BG)
+#endif
#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT2_FEATURE_RO_COMPAT_BTREE_DIR)
@@ -588,4 +600,16 @@ enum {
~EXT2_DIR_ROUND)
#define EXT2_MAX_REC_LEN ((1<<16)-1)
+#ifndef __KERNEL__
+/* This simplifies things for user programs (notably e2fsprogs) that
+ must compile whether or not <linux/ext2_fs_c.h> is present, but
+ would prefer to include it. Presumably the file is present if the
+ user has this version of ext2_fs.h. */
+
+# /* Do not remove this comment. */ include <linux/ext2_fs_c.h>
+
+/* The comment between `#' and `include' prevents mkdep from generating
+ a dependency on ext2_fs_c.h. */
+#endif
+
#endif /* _LINUX_EXT2_FS_H */
--- linux-3.2-rc5/fs/fcntl.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/fs/fcntl.c 2011-12-13 14:22:47.857976344 +0100
@@ -25,6 +25,12 @@
#include <asm/siginfo.h>
#include <asm/uaccess.h>
+#ifdef CONFIG_EXT2_COMPRESS
+//mw: deny O_DIRECT on file with compression
+#include <linux/ext2_fs.h>
+#include "ext2/ext2.h"
+#endif
+
void set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
@@ -171,6 +177,16 @@ static int setfl(int fd, struct file * f
if (!filp->f_mapping || !filp->f_mapping->a_ops ||
!filp->f_mapping->a_ops->direct_IO)
return -EINVAL;
+
+#ifdef CONFIG_EXT2_COMPRESS
+ //mw: if we have a compressed ext2 file: deny!
+ // TODO: maybe check fs-type first!
+ //assert(!(EXT2_I(inode)->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)));
+ if (EXT2_I(inode)->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL))
+ {
+ return -EINVAL;
+ }
+#endif
}
if (filp->f_op && filp->f_op->check_flags)
--- linux-3.2-rc5/mm/truncate.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/mm/truncate.c 2011-12-13 14:22:47.858976376 +0100
@@ -22,6 +22,9 @@
#include <linux/cleancache.h>
#include "internal.h"
+#ifdef CONFIG_EXT2_COMPRESS
+#include <linux/ext2_fs_c.h>
+#endif
/**
* do_invalidatepage - invalidate part or all of a page
@@ -551,6 +554,11 @@ void truncate_pagecache(struct inode *in
* unmap_mapping_range call must be made for correctness.
*/
unmap_mapping_range(mapping, holebegin, 0, 1);
+#ifdef CONFIG_EXT2_COMPRESS
+ if ((inode->i_op && inode->i_op->truncate) &&
+ ((strcmp(inode->i_sb->s_type->name, "ext2") != 0) ||
+ (!(EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))))
+#endif
truncate_inode_pages(mapping, newsize);
unmap_mapping_range(mapping, holebegin, 0, 1);
}
--- linux-3.2-rc5/mm/swapfile.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/mm/swapfile.c 2011-12-13 14:22:47.859976408 +0100
@@ -31,6 +31,10 @@
#include <linux/memcontrol.h>
#include <linux/poll.h>
#include <linux/oom.h>
+#ifdef CONFIG_EXT2_COMPRESS
+#include <linux/ext2_fs_c.h>
+#endif
+
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -2056,6 +2060,24 @@ SYSCALL_DEFINE2(swapon, const char __use
}
inode = mapping->host;
+
+#ifdef CONFIG_EXT2_COMPRESS
+ /*
+ * Swapping not supported for e2compressed files.
+ * (Actually, this code is pretty useless because we
+ * should get an error later anyway because of the
+ * holes.) Yes, this is pretty horrible code... I'll
+ * improve it later.
+ */
+ if ((strcmp(inode->i_sb->s_type->name, "ext2") == 0)
+ && (EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))
+ {
+ printk("Assertion: Error NO swap SWAP implemented!\n");
+ error = -EINVAL;
+ goto bad_swap;
+ }
+#endif
+
/* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
error = claim_swapfile(p, inode);
if (unlikely(error))
--- linux-3.2-rc5/mm/filemap.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/mm/filemap.c 2011-12-13 14:22:47.860976440 +0100
@@ -43,6 +43,10 @@
#include <asm/mman.h>
+#ifdef CONFIG_EXT2_COMPRESS
+# include <linux/ext2_fs_c.h>
+#endif
+
/*
* Shared mappings implemented 30.11.1994. It's not fully working yet,
* though.
@@ -278,7 +282,19 @@ int filemap_fdatawait_range(struct addre
PAGECACHE_TAG_WRITEBACK,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
unsigned i;
+#ifdef CONFIG_EXT2_COMPRESS
+/*
+ * I'm not sure that this is right. It has been reworked considerably since
+ * 2.6.5. - whitpa
+ */
+ struct inode *inode = mapping->host;
+ //printk("wait_on_page_writeback_range\n");
+ if ((strcmp(inode->i_sb->s_type->name, "ext2") != 0)
+ || (atomic_read(&inode->i_mutex.count) > 0)
+ || (EXT2_I(inode)->i_compr_flags &
+ EXT2_OSYNC_INODE))
+#endif
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -1184,6 +1200,15 @@ page_ok:
}
nr = nr - offset;
+#ifdef CONFIG_EXT2_COMPRESS
+ lock_page(page);
+ //check again: after locking still uptodate?
+ if(!PageUptodate(page)){
+ unlock_page(page);
+ goto page_not_up_to_date;
+ }
+#endif
+
/* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing
* before reading the page on the kernel side.
@@ -1215,6 +1240,10 @@ page_ok:
offset &= ~PAGE_CACHE_MASK;
prev_offset = offset;
+#ifdef CONFIG_EXT2_COMPRESS
+ unlock_page(page);
+#endif
+
page_cache_release(page);
if (ret == nr && desc->count)
continue;
@@ -1224,7 +1253,12 @@ page_not_up_to_date:
/* Get exclusive access to the page ... */
error = lock_page_killable(page);
if (unlikely(error))
+ {
+ printk("Readpage Error: mw: page locking failed with code: %i\n", error);
+ printk("Readpage Error: mw: might happen as page was locked 'killable'\n");
+ printk("Readpage Error: mw: was reading app killed?\n");
goto readpage_error;
+ }
page_not_up_to_date_locked:
/* Did it get truncated before we got the lock? */
@@ -1255,13 +1289,17 @@ readpage:
page_cache_release(page);
goto find_page;
}
+ printk("Readpage Error: fs-specific readpage failed with code: %i\n", error);
goto readpage_error;
}
if (!PageUptodate(page)) {
error = lock_page_killable(page);
if (unlikely(error))
+ {
+ printk("Readpage Error: page was not uptodate after read. page locking failed with code: %i\n", error);
goto readpage_error;
+ }
if (!PageUptodate(page)) {
if (page->mapping == NULL) {
/*
@@ -1274,6 +1312,7 @@ readpage:
unlock_page(page);
shrink_readahead_size_eio(filp, ra);
error = -EIO;
+ printk("Readpage Error: page was not uptodate after read AND page locked. failed with code: %i\n", error);
goto readpage_error;
}
unlock_page(page);
@@ -1285,6 +1324,7 @@ readpage_error:
/* UHHUH! A synchronous read error occurred. Report it */
desc->error = error;
page_cache_release(page);
+ printk("Readpage Error\n");
goto out;
no_cached_page:
--- linux-3.2-rc5/mm/page_alloc.c 2011-12-10 00:09:32.000000000 +0100
+++ linux-3.2-rc5-e2c/mm/page_alloc.c 2011-12-13 14:22:47.863976534 +0100
@@ -1733,6 +1733,8 @@ this_zone_full:
}
return page;
}
+/*mw: needed to build ext2 /w e2compr as module */
+EXPORT_SYMBOL(__pagevec_free);
/*
* Large machines with many possible nodes should not always dump per-node